265 files changed, 11253 insertions, 14749 deletions
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
index 6684978318..653ebd46cd 100644
--- a/COPYRIGHT.txt
+++ b/COPYRIGHT.txt
@@ -134,10 +134,10 @@ Comment: ENet
 Copyright: 2002-2020, Lee Salzman
 License: Expat
 
-Files: ./thirdparty/etc2comp/
-Comment: Etc2Comp
-Copyright: 2015, Etc2Comp Authors
-License: Apache-2.0
+Files: ./thirdparty/etcpak/
+Comment: etcpak
+Copyright: 2013-2021, Bartosz Taudul
+License: BSD-3-clause
 
 Files: ./thirdparty/fonts/DroidSans*.ttf
 Comment: DroidSans font
diff --git a/core/core_bind.cpp b/core/core_bind.cpp
index c3d547c2c7..84d8d0d4d3 100644
--- a/core/core_bind.cpp
+++ b/core/core_bind.cpp
@@ -1334,7 +1334,7 @@ Vector<uint8_t> _File::get_buffer(int p_length) const {
 	ERR_FAIL_COND_V(len < 0, Vector<uint8_t>());
 
 	if (len < p_length) {
-		data.resize(p_length);
+		data.resize(len);
 	}
 
 	return data;
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 05c80296c2..cf0040353d 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -3772,9 +3772,9 @@ String String::uri_encode() const {
 		} else {
 			char h_Val[3];
 #if defined(__GNUC__) || defined(_MSC_VER)
-			snprintf(h_Val, 3, "%hhX", ord);
+			snprintf(h_Val, 3, "%02hhX", ord);
 #else
-			sprintf(h_Val, "%hhX", ord);
+			sprintf(h_Val, "%02hhX", ord);
 #endif
 			res += "%";
 			res += h_Val;
diff --git a/core/variant/binder_common.h b/core/variant/binder_common.h
index 8c0b7907e3..86bbf43266 100644
--- a/core/variant/binder_common.h
+++ b/core/variant/binder_common.h
@@ -233,6 +233,11 @@ void call_with_ptr_args_retc_helper(T *p_instance, R (T::*p_method)(P...) const,
 	PtrToArg<R>::encode((p_instance->*p_method)(PtrToArg<P>::convert(p_args[Is])...), r_ret);
 }
 
+template <class T, class... P, size_t... Is>
+void call_with_ptr_args_static_helper(T *p_instance, void (*p_method)(T *, P...), const void **p_args, IndexSequence<Is...>) {
+	p_method(p_instance, PtrToArg<P>::convert(p_args[Is])...);
+}
+
 template <class T, class R, class... P, size_t... Is>
 void call_with_ptr_args_static_retc_helper(T *p_instance, R (*p_method)(T *, P...), const void **p_args, void *r_ret, IndexSequence<Is...>) {
 	PtrToArg<R>::encode(p_method(p_instance, PtrToArg<P>::convert(p_args[Is])...), r_ret);
@@ -273,6 +278,11 @@ void call_with_validated_variant_args_static_retc_helper(T *p_instance, R (*p_me
 	VariantInternalAccessor<typename GetSimpleTypeT<R>::type_t>::set(r_ret, p_method(p_instance, (VariantInternalAccessor<typename GetSimpleTypeT<P>::type_t>::get(p_args[Is]))...));
 }
 
+template <class T, class... P, size_t... Is>
+void call_with_validated_variant_args_static_helper(T *p_instance, void (*p_method)(T *, P...), const Variant **p_args, IndexSequence<Is...>) {
+	p_method(p_instance, (VariantInternalAccessor<typename GetSimpleTypeT<P>::type_t>::get(p_args[Is]))...);
+}
+
 template <class R, class... P, size_t... Is>
 void call_with_validated_variant_args_static_method_ret_helper(R (*p_method)(P...), const Variant **p_args, Variant *r_ret, IndexSequence<Is...>) {
 	VariantInternalAccessor<typename GetSimpleTypeT<R>::type_t>::set(r_ret, p_method((VariantInternalAccessor<typename GetSimpleTypeT<P>::type_t>::get(p_args[Is]))...));
@@ -471,6 +481,11 @@ void call_with_ptr_args_retc(T *p_instance, R (T::*p_method)(P...) const, const
 	call_with_ptr_args_retc_helper<T, R, P...>(p_instance, p_method, p_args, r_ret, BuildIndexSequence<sizeof...(P)>{});
 }
 
+template <class T, class... P>
+void call_with_ptr_args_static(T *p_instance, void (*p_method)(T *, P...), const void **p_args) {
+	call_with_ptr_args_static_helper<T, P...>(p_instance, p_method, p_args, BuildIndexSequence<sizeof...(P)>{});
+}
+
 template <class T, class R, class... P>
 void call_with_ptr_args_static_retc(T *p_instance, R (*p_method)(T *, P...), const void **p_args, void *r_ret) {
 	call_with_ptr_args_static_retc_helper<T, R, P...>(p_instance, p_method, p_args, r_ret, BuildIndexSequence<sizeof...(P)>{});
@@ -501,6 +516,11 @@ void call_with_validated_variant_args_retc(Variant *base, R (T::*p_method)(P...)
 	call_with_validated_variant_args_retc_helper<T, R, P...>(VariantGetInternalPtr<T>::get_ptr(base), p_method, p_args, r_ret, BuildIndexSequence<sizeof...(P)>{});
 }
 
+template <class T, class... P>
+void call_with_validated_variant_args_static(Variant *base, void (*p_method)(T *, P...), const Variant **p_args) {
+	call_with_validated_variant_args_static_helper<T, P...>(VariantGetInternalPtr<T>::get_ptr(base), p_method, p_args, BuildIndexSequence<sizeof...(P)>{});
+}
+
 template <class T, class R, class... P>
 void call_with_validated_variant_args_static_retc(Variant *base, R (*p_method)(T *, P...), const Variant **p_args, Variant *r_ret) {
 	call_with_validated_variant_args_static_retc_helper<T, R, P...>(VariantGetInternalPtr<T>::get_ptr(base), p_method, p_args, r_ret, BuildIndexSequence<sizeof...(P)>{});
@@ -758,6 +778,52 @@ void call_with_variant_args_retc_static_helper_dv(T *p_instance, R (*p_method)(T
 	call_with_variant_args_retc_static_helper(p_instance, p_method, args, r_ret, r_error, BuildIndexSequence<sizeof...(P)>{});
 }
 
+template <class T, class... P, size_t... Is>
+void call_with_variant_args_static_helper(T *p_instance, void (*p_method)(T *, P...), const Variant **p_args, Callable::CallError &r_error, IndexSequence<Is...>) {
+	r_error.error = Callable::CallError::CALL_OK;
+
+#ifdef DEBUG_METHODS_ENABLED
+	(p_method)(p_instance, VariantCasterAndValidate<P>::cast(p_args, Is, r_error)...);
+#else
+	(p_method)(p_instance, VariantCaster<P>::cast(*p_args[Is])...);
+#endif
+
+	(void)p_args;
+}
+
+template <class T, class... P>
+void call_with_variant_args_static_helper_dv(T *p_instance, void (*p_method)(T *, P...), const Variant **p_args, int p_argcount, const Vector<Variant> &default_values, Callable::CallError &r_error) {
+#ifdef DEBUG_ENABLED
+	if ((size_t)p_argcount > sizeof...(P)) {
+		r_error.error = Callable::CallError::CALL_ERROR_TOO_MANY_ARGUMENTS;
+		r_error.argument = sizeof...(P);
+		return;
+	}
+#endif
+
+	int32_t missing = (int32_t)sizeof...(P) - (int32_t)p_argcount;
+
+	int32_t dvs = default_values.size();
+#ifdef DEBUG_ENABLED
+	if (missing > dvs) {
+		r_error.error = Callable::CallError::CALL_ERROR_TOO_FEW_ARGUMENTS;
+		r_error.argument = sizeof...(P);
+		return;
+	}
+#endif
+
+	const Variant *args[sizeof...(P) == 0 ? 1 : sizeof...(P)]; //avoid zero sized array
+	for (int32_t i = 0; i < (int32_t)sizeof...(P); i++) {
+		if (i < p_argcount) {
+			args[i] = p_args[i];
+		} else {
+			args[i] = &default_values[i - p_argcount + (dvs - missing)];
+		}
+	}
+
+	call_with_variant_args_static_helper(p_instance, p_method, args, r_error, BuildIndexSequence<sizeof...(P)>{});
+}
+
 template <class R, class... P>
 void call_with_variant_args_static_ret_dv(R (*p_method)(P...), const Variant **p_args, int p_argcount, Variant &r_ret, Callable::CallError &r_error, const Vector<Variant> &default_values) {
 #ifdef DEBUG_ENABLED
diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp
index 8f2d252810..61f3f7d82e 100644
--- a/core/variant/variant_call.cpp
+++ b/core/variant/variant_call.cpp
@@ -34,6 +34,7 @@
 #include "core/crypto/crypto_core.h"
 #include "core/debugger/engine_debugger.h"
 #include "core/io/compression.h"
+#include "core/io/marshalls.h"
 #include "core/object/class_db.h"
 #include "core/os/os.h"
 #include "core/templates/local_vector.h"
@@ -73,6 +74,16 @@ static _FORCE_INLINE_ void vc_method_call(void (T::*method)(P...) const, Variant
 }
 
 template <class R, class T, class... P>
+static _FORCE_INLINE_ void vc_method_call_static(R (*method)(T *, P...), Variant *base, const Variant **p_args, int p_argcount, Variant &r_ret, const Vector<Variant> &p_defvals, Callable::CallError &r_error) {
+	call_with_variant_args_retc_static_helper_dv(VariantGetInternalPtr<T>::get_ptr(base), method, p_args, p_argcount, r_ret, p_defvals, r_error);
+}
+
+template <class T, class... P>
+static _FORCE_INLINE_ void vc_method_call_static(void (*method)(T *, P...), Variant *base, const Variant **p_args, int p_argcount, Variant &r_ret, const Vector<Variant> &p_defvals, Callable::CallError &r_error) {
+	call_with_variant_args_static_helper_dv(VariantGetInternalPtr<T>::get_ptr(base), method, p_args, p_argcount, p_defvals, r_error);
+}
+
+template <class R, class T, class... P>
 static _FORCE_INLINE_ void vc_validated_call(R (T::*method)(P...), Variant *base, const Variant **p_args, Variant *r_ret) {
 	call_with_validated_variant_args_ret(base, method, p_args, r_ret);
 }
@@ -91,6 +102,16 @@ static _FORCE_INLINE_ void vc_validated_call(void (T::*method)(P...) const, Vari
 	call_with_validated_variant_argsc(base, method, p_args);
 }
 
+template <class R, class T, class... P>
+static _FORCE_INLINE_ void vc_validated_call_static(R (*method)(T *, P...), Variant *base, const Variant **p_args, Variant *r_ret) {
+	call_with_validated_variant_args_static_retc(base, method, p_args, r_ret);
+}
+
+template <class T, class... P>
+static _FORCE_INLINE_ void vc_validated_call_static(void (*method)(T *, P...), Variant *base, const Variant **p_args, Variant *r_ret) {
+	call_with_validated_variant_args_static(base, method, p_args);
+}
+
 template <class R, class... P>
 static _FORCE_INLINE_ void vc_validated_static_call(R (*method)(P...), const Variant **p_args, Variant *r_ret) {
 	call_with_validated_variant_args_static_method_ret(method, p_args, r_ret);
@@ -146,6 +167,11 @@ static _FORCE_INLINE_ void vc_change_return_type(R (*method)(P...), Variant *v)
 	VariantTypeAdjust<R>::adjust(v);
 }
 
+template <class... P>
+static _FORCE_INLINE_ void vc_change_return_type(void (*method)(P...), Variant *v) {
+	VariantInternal::clear(v);
+}
+
 template <class R, class T, class... P>
 static _FORCE_INLINE_ int vc_get_argument_count(R (T::*method)(P...)) {
 	return sizeof...(P);
@@ -229,6 +255,11 @@ static _FORCE_INLINE_ Variant::Type vc_get_return_type(R (*method)(P...)) {
 	return GetTypeInfo<R>::VARIANT_TYPE;
 }
 
+template <class... P>
+static _FORCE_INLINE_ Variant::Type vc_get_return_type(void (*method)(P...)) {
+	return Variant::NIL;
+}
+
 template <class R, class T, class... P>
 static _FORCE_INLINE_ bool vc_has_return_type(R (T::*method)(P...)) {
 	return true;
@@ -393,45 +424,50 @@ static _FORCE_INLINE_ void vc_ptrcall(R (*method)(T *, P...), void *p_base, cons
 	call_with_ptr_args_static_retc<T, R, P...>(reinterpret_cast<T *>(p_base), method, p_args, r_ret);
 }
 
-#define FUNCTION_CLASS(m_class, m_method_name, m_method_ptr)                                                                                                          \
-	struct Method_##m_class##_##m_method_name {                                                                                                                       \
-		static void call(Variant *base, const Variant **p_args, int p_argcount, Variant &r_ret, const Vector<Variant> &p_defvals, Callable::CallError &r_error) {     \
-			call_with_variant_args_retc_static_helper_dv(VariantGetInternalPtr<m_class>::get_ptr(base), m_method_ptr, p_args, p_argcount, r_ret, p_defvals, r_error); \
-		}                                                                                                                                                             \
-		static void validated_call(Variant *base, const Variant **p_args, int p_argcount, Variant *r_ret) {                                                           \
-			vc_change_return_type(m_method_ptr, r_ret);                                                                                                               \
-			call_with_validated_variant_args_static_retc(base, m_method_ptr, p_args, r_ret);                                                                          \
-		}                                                                                                                                                             \
-		static void ptrcall(void *p_base, const void **p_args, void *r_ret, int p_argcount) {                                                                         \
-			vc_ptrcall(m_method_ptr, p_base, p_args, r_ret);                                                                                                          \
-		}                                                                                                                                                             \
-		static int get_argument_count() {                                                                                                                             \
-			return vc_get_argument_count(m_method_ptr);                                                                                                               \
-		}                                                                                                                                                             \
-		static Variant::Type get_argument_type(int p_arg) {                                                                                                           \
-			return vc_get_argument_type(m_method_ptr, p_arg);                                                                                                         \
-		}                                                                                                                                                             \
-		static Variant::Type get_return_type() {                                                                                                                      \
-			return vc_get_return_type(m_method_ptr);                                                                                                                  \
-		}                                                                                                                                                             \
-		static bool has_return_type() {                                                                                                                               \
-			return true;                                                                                                                                              \
-		}                                                                                                                                                             \
-		static bool is_const() {                                                                                                                                      \
-			return true;                                                                                                                                              \
-		}                                                                                                                                                             \
-		static bool is_static() {                                                                                                                                     \
-			return false;                                                                                                                                             \
-		}                                                                                                                                                             \
-		static bool is_vararg() {                                                                                                                                     \
-			return false;                                                                                                                                             \
-		}                                                                                                                                                             \
-		static Variant::Type get_base_type() {                                                                                                                        \
-			return GetTypeInfo<m_class>::VARIANT_TYPE;                                                                                                                \
-		}                                                                                                                                                             \
-		static StringName get_name() {                                                                                                                                \
-			return #m_method_name;                                                                                                                                    \
-		}                                                                                                                                                             \
+template <class T, class... P>
+static _FORCE_INLINE_ void vc_ptrcall(void (*method)(T *, P...), void *p_base, const void **p_args, void *r_ret) {
+	call_with_ptr_args_static<T, P...>(reinterpret_cast<T *>(p_base), method, p_args);
+}
+
+#define FUNCTION_CLASS(m_class, m_method_name, m_method_ptr, m_const)                                                                                             \
+	struct Method_##m_class##_##m_method_name {                                                                                                                   \
+		static void call(Variant *base, const Variant **p_args, int p_argcount, Variant &r_ret, const Vector<Variant> &p_defvals, Callable::CallError &r_error) { \
+			vc_method_call_static(m_method_ptr, base, p_args, p_argcount, r_ret, p_defvals, r_error);                                                             \
+		}                                                                                                                                                         \
+		static void validated_call(Variant *base, const Variant **p_args, int p_argcount, Variant *r_ret) {                                                       \
+			vc_change_return_type(m_method_ptr, r_ret);                                                                                                           \
+			vc_validated_call_static(m_method_ptr, base, p_args, r_ret);                                                                                          \
+		}                                                                                                                                                         \
+		static void ptrcall(void *p_base, const void **p_args, void *r_ret, int p_argcount) {                                                                     \
+			vc_ptrcall(m_method_ptr, p_base, p_args, r_ret);                                                                                                      \
+		}                                                                                                                                                         \
+		static int get_argument_count() {                                                                                                                         \
+			return vc_get_argument_count(m_method_ptr);                                                                                                           \
+		}                                                                                                                                                         \
+		static Variant::Type get_argument_type(int p_arg) {                                                                                                       \
+			return vc_get_argument_type(m_method_ptr, p_arg);                                                                                                     \
+		}                                                                                                                                                         \
+		static Variant::Type get_return_type() {                                                                                                                  \
+			return vc_get_return_type(m_method_ptr);                                                                                                              \
+		}                                                                                                                                                         \
+		static bool has_return_type() {                                                                                                                           \
+			return vc_has_return_type_static(m_method_ptr);                                                                                                       \
+		}                                                                                                                                                         \
+		static bool is_const() {                                                                                                                                  \
+			return m_const;                                                                                                                                       \
+		}                                                                                                                                                         \
+		static bool is_static() {                                                                                                                                 \
+			return false;                                                                                                                                         \
+		}                                                                                                                                                         \
+		static bool is_vararg() {                                                                                                                                 \
+			return false;                                                                                                                                         \
+		}                                                                                                                                                         \
+		static Variant::Type get_base_type() {                                                                                                                    \
+			return GetTypeInfo<m_class>::VARIANT_TYPE;                                                                                                            \
+		}                                                                                                                                                         \
+		static StringName get_name() {                                                                                                                            \
+			return #m_method_name;                                                                                                                                \
+		}                                                                                                                                                         \
 	};
 
 #define VARARG_CLASS(m_class, m_method_name, m_method_ptr, m_has_return, m_return_type)                                                                           \
@@ -590,6 +626,195 @@ struct _VariantCall {
 		return s;
 	}
 
+	static int64_t func_PackedByteArray_decode_u8(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > int64_t(size) - 1, 0);
+		const uint8_t *r = p_instance->ptr();
+		return r[p_offset];
+	}
+	static int64_t func_PackedByteArray_decode_s8(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > int64_t(size) - 1, 0);
+		const uint8_t *r = p_instance->ptr();
+		return *((const int8_t *)&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_u16(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 2), 0);
+		const uint8_t *r = p_instance->ptr();
+		return decode_uint16(&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_s16(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 2), 0);
+		const uint8_t *r = p_instance->ptr();
+		return (int16_t)decode_uint16(&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_u32(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 4), 0);
+		const uint8_t *r = p_instance->ptr();
+		return decode_uint32(&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_s32(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 4), 0);
+		const uint8_t *r = p_instance->ptr();
+		return (int32_t)decode_uint32(&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_u64(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 8), 0);
+		const uint8_t *r = p_instance->ptr();
+		return (int64_t)decode_uint64(&r[p_offset]);
+	}
+	static int64_t func_PackedByteArray_decode_s64(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 8), 0);
+		const uint8_t *r = p_instance->ptr();
+		return (int64_t)decode_uint64(&r[p_offset]);
+	}
+	static double func_PackedByteArray_decode_half(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 2), 0);
+		const uint8_t *r = p_instance->ptr();
+		return Math::half_to_float(decode_uint16(&r[p_offset]));
+	}
+	static double func_PackedByteArray_decode_float(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 4), 0);
+		const uint8_t *r = p_instance->ptr();
+		return decode_float(&r[p_offset]);
+	}
+
+	static double func_PackedByteArray_decode_double(PackedByteArray *p_instance, int64_t p_offset) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0 || p_offset > (int64_t(size) - 8), 0);
+		const uint8_t *r = p_instance->ptr();
+		return decode_double(&r[p_offset]);
+	}
+
+	static bool func_PackedByteArray_has_encoded_var(PackedByteArray *p_instance, int64_t p_offset, bool p_allow_objects) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0, false);
+		const uint8_t *r = p_instance->ptr();
+		Variant ret;
+		Error err = decode_variant(ret, r + p_offset, size - p_offset, nullptr, p_allow_objects);
+		return err == OK;
+	}
+
+	static Variant func_PackedByteArray_decode_var(PackedByteArray *p_instance, int64_t p_offset, bool p_allow_objects) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0, Variant());
+		const uint8_t *r = p_instance->ptr();
+		Variant ret;
+		Error err = decode_variant(ret, r + p_offset, size - p_offset, nullptr, p_allow_objects);
+		if (err != OK) {
+			ret = Variant();
+		}
+		return ret;
+	}
+
+	static int64_t func_PackedByteArray_decode_var_size(PackedByteArray *p_instance, int64_t p_offset, bool p_allow_objects) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0, 0);
+		const uint8_t *r = p_instance->ptr();
+		Variant ret;
+		int r_size;
+		Error err = decode_variant(ret, r + p_offset, size - p_offset, &r_size, p_allow_objects);
+		if (err == OK) {
+			return r_size;
+		}
+		return 0;
+	}
+
+	static void func_PackedByteArray_encode_u8(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 1);
+		uint8_t *w = p_instance->ptrw();
+		*((uint8_t *)&w[p_offset]) = p_value;
+	}
+	static void func_PackedByteArray_encode_s8(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 1);
+		uint8_t *w = p_instance->ptrw();
+		*((int8_t *)&w[p_offset]) = p_value;
+	}
+
+	static void func_PackedByteArray_encode_u16(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 2);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint16((uint16_t)p_value, &w[p_offset]);
+	}
+	static void func_PackedByteArray_encode_s16(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 2);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint16((int16_t)p_value, &w[p_offset]);
+	}
+
+	static void func_PackedByteArray_encode_u32(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 4);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint32((uint32_t)p_value, &w[p_offset]);
+	}
+	static void func_PackedByteArray_encode_s32(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 4);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint32((int32_t)p_value, &w[p_offset]);
+	}
+
+	static void func_PackedByteArray_encode_u64(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 8);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint64((uint64_t)p_value, &w[p_offset]);
+	}
+	static void func_PackedByteArray_encode_s64(PackedByteArray *p_instance, int64_t p_offset, int64_t p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 8);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint64((int64_t)p_value, &w[p_offset]);
+	}
+
+	static void func_PackedByteArray_encode_half(PackedByteArray *p_instance, int64_t p_offset, double p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 2);
+		uint8_t *w = p_instance->ptrw();
+		encode_uint16(Math::make_half_float(p_value), &w[p_offset]);
+	}
+	static void func_PackedByteArray_encode_float(PackedByteArray *p_instance, int64_t p_offset, double p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 4);
+		uint8_t *w = p_instance->ptrw();
+		encode_float(p_value, &w[p_offset]);
+	}
+	static void func_PackedByteArray_encode_double(PackedByteArray *p_instance, int64_t p_offset, double p_value) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND(p_offset < 0 || p_offset > int64_t(size) - 8);
+		uint8_t *w = p_instance->ptrw();
+		encode_double(p_value, &w[p_offset]);
+	}
+	static int64_t func_PackedByteArray_encode_var(PackedByteArray *p_instance, int64_t p_offset, const Variant &p_value, bool p_allow_objects) {
+		uint64_t size = p_instance->size();
+		ERR_FAIL_COND_V(p_offset < 0, -1);
+		uint8_t *w = p_instance->ptrw();
+		int len;
+		Error err = encode_variant(p_value, nullptr, len, p_allow_objects);
+		if (err != OK) {
+			return -1;
+		}
+		if (uint64_t(p_offset + len) > size) {
+			return -1; // did not fit
+		}
+		encode_variant(p_value, w + p_offset, len, p_allow_objects);
+
+		return len;
+	}
+
 	static void func_Callable_call(Variant *v, const Variant **p_args, int p_argcount, Variant &r_ret, Callable::CallError &r_error) {
 		Callable *callable = VariantGetInternalPtr<Callable>::get_ptr(v);
 		callable->call(p_args, p_argcount, r_ret, r_error);
@@ -1005,11 +1230,21 @@ Variant Variant::get_constant_value(Variant::Type p_type, const StringName &p_va
 
 #ifdef DEBUG_METHODS_ENABLED
 #define bind_function(m_type, m_name, m_method, m_arg_names, m_default_args) \
-	FUNCTION_CLASS(m_type, m_name, m_method);                                \
+	FUNCTION_CLASS(m_type, m_name, m_method, true);                          \
 	register_builtin_method<Method_##m_type##_##m_name>(m_arg_names, m_default_args);
 #else
 #define bind_function(m_type, m_name, m_method, m_arg_names, m_default_args) \
-	FUNCTION_CLASS(m_type, m_name, m_method);                                \
+	FUNCTION_CLASS(m_type, m_name, m_method, true);                          \
+	register_builtin_method<Method_##m_type##_##m_name>(sarray(), m_default_args);
+#endif
+
+#ifdef DEBUG_METHODS_ENABLED
+#define bind_functionnc(m_type, m_name, m_method, m_arg_names, m_default_args) \
+	FUNCTION_CLASS(m_type, m_name, m_method, false);                           \
+	register_builtin_method<Method_##m_type##_##m_name>(m_arg_names, m_default_args);
+#else
+#define bind_functionnc(m_type, m_name, m_method, m_arg_names, m_default_args) \
+	FUNCTION_CLASS(m_type, m_name, m_method, false);                           \
 	register_builtin_method<Method_##m_type##_##m_name>(sarray(), m_default_args);
 #endif
 
@@ -1491,6 +1726,34 @@ static void _register_variant_builtin_methods() {
 	bind_function(PackedByteArray, decompress, _VariantCall::func_PackedByteArray_decompress, sarray("buffer_size", "compression_mode"), varray(0));
 	bind_function(PackedByteArray, decompress_dynamic, _VariantCall::func_PackedByteArray_decompress_dynamic, sarray("max_output_size", "compression_mode"), varray(0));
 
+	bind_function(PackedByteArray, decode_u8, _VariantCall::func_PackedByteArray_decode_u8, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_s8, _VariantCall::func_PackedByteArray_decode_s8, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_u16, _VariantCall::func_PackedByteArray_decode_u16, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_s16, _VariantCall::func_PackedByteArray_decode_s16, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_u32, _VariantCall::func_PackedByteArray_decode_u32, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_s32, _VariantCall::func_PackedByteArray_decode_s32, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_u64, _VariantCall::func_PackedByteArray_decode_u64, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_s64, _VariantCall::func_PackedByteArray_decode_s64, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_half, _VariantCall::func_PackedByteArray_decode_half, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_float, _VariantCall::func_PackedByteArray_decode_float, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, decode_double, _VariantCall::func_PackedByteArray_decode_double, sarray("byte_offset"), varray());
+	bind_function(PackedByteArray, has_encoded_var, _VariantCall::func_PackedByteArray_has_encoded_var, sarray("byte_offset", "allow_objects"), varray(false));
+	bind_function(PackedByteArray, decode_var, _VariantCall::func_PackedByteArray_decode_var, sarray("byte_offset", "allow_objects"), varray(false));
+	bind_function(PackedByteArray, decode_var_size, _VariantCall::func_PackedByteArray_decode_var_size, sarray("byte_offset", "allow_objects"), varray(false));
+
+	bind_functionnc(PackedByteArray, encode_u8, _VariantCall::func_PackedByteArray_encode_u8, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_s8, _VariantCall::func_PackedByteArray_encode_s8, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_u16, _VariantCall::func_PackedByteArray_encode_u16, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_s16, _VariantCall::func_PackedByteArray_encode_s16, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_u32, _VariantCall::func_PackedByteArray_encode_u32, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_s32, _VariantCall::func_PackedByteArray_encode_s32, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_u64, _VariantCall::func_PackedByteArray_encode_u64, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_s64, _VariantCall::func_PackedByteArray_encode_s64, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_half, _VariantCall::func_PackedByteArray_encode_half, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_float, _VariantCall::func_PackedByteArray_encode_float, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_double, _VariantCall::func_PackedByteArray_encode_double, sarray("byte_offset", "value"), varray());
+	bind_functionnc(PackedByteArray, encode_var, _VariantCall::func_PackedByteArray_encode_var, sarray("byte_offset", "value", "allow_objects"), varray(false));
+
 	/* Int32 Array */
 
 	bind_method(PackedInt32Array, size, sarray(), varray());
diff --git a/core/variant/variant_setget.cpp b/core/variant/variant_setget.cpp
index f319631ce5..9ab8602782 100644
--- a/core/variant/variant_setget.cpp
+++ b/core/variant/variant_setget.cpp
@@ -1045,6 +1045,7 @@ void register_indexed_setters_getters() {
 	REGISTER_INDEXED_MEMBER(PackedByteArray);
 	REGISTER_INDEXED_MEMBER(PackedInt32Array);
 	REGISTER_INDEXED_MEMBER(PackedInt64Array);
+	REGISTER_INDEXED_MEMBER(PackedFloat32Array);
 	REGISTER_INDEXED_MEMBER(PackedFloat64Array);
 	REGISTER_INDEXED_MEMBER(PackedVector2Array);
 	REGISTER_INDEXED_MEMBER(PackedVector3Array);
diff --git a/doc/classes/CapsuleMesh.xml b/doc/classes/CapsuleMesh.xml
index fab11d44cc..031abd0112 100644
--- a/doc/classes/CapsuleMesh.xml
+++ b/doc/classes/CapsuleMesh.xml
@@ -12,7 +12,8 @@
 	</methods>
 	<members>
 		<member name="mid_height" type="float" setter="set_mid_height" getter="get_mid_height" default="1.0">
-			Height of the capsule mesh from the center point.
+			Height of the middle cylindrical part of the capsule (without the hemispherical ends).
+			[b]Note:[/b] The capsule's total height is equal to [member mid_height] + 2 * [member radius].
 		</member>
 		<member name="radial_segments" type="int" setter="set_radial_segments" getter="get_radial_segments" default="64">
 			Number of radial segments on the capsule mesh.
diff --git a/doc/classes/ColorPicker.xml b/doc/classes/ColorPicker.xml
index 2fc4313e47..83223bb645 100644
--- a/doc/classes/ColorPicker.xml
+++ b/doc/classes/ColorPicker.xml
@@ -51,6 +51,9 @@
 			If [code]true[/code], allows editing the color with Hue/Saturation/Value sliders.
 			[b]Note:[/b] Cannot be enabled if raw mode is on.
 		</member>
+		<member name="picker_shape" type="int" setter="set_picker_shape" getter="get_picker_shape" default="0">
+			The shape of the color space view. See [enum PickerShapeType].
+		</member>
 		<member name="presets_enabled" type="bool" setter="set_presets_enabled" getter="are_presets_enabled" default="true">
 			If [code]true[/code], the "add preset" button is enabled.
 		</member>
@@ -86,6 +89,15 @@
 		</signal>
 	</signals>
 	<constants>
+		<constant name="SHAPE_HSV_RECTANGLE" value="0" enum="PickerShapeType">
+			HSV Color Model rectangle color space.
+		</constant>
+		<constant name="SHAPE_HSV_WHEEL" value="1" enum="PickerShapeType">
+			HSV Color Model rectangle color space with a wheel.
+		</constant>
+		<constant name="SHAPE_VHS_CIRCLE" value="2" enum="PickerShapeType">
+			HSV Color Model circle color space. Use Saturation as a radius.
+		</constant>
 	</constants>
 	<theme_items>
 		<theme_item name="add_preset" type="Texture2D">
diff --git a/doc/classes/EditorInterface.xml b/doc/classes/EditorInterface.xml
index 4d0e11fb19..a5328ce382 100644
--- a/doc/classes/EditorInterface.xml
+++ b/doc/classes/EditorInterface.xml
@@ -10,6 +10,15 @@
 	<tutorials>
 	</tutorials>
 	<methods>
+		<method name="edit_node">
+			<return type="void">
+			</return>
+			<argument index="0" name="node" type="Node">
+			</argument>
+			<description>
+				Edits the given [Node]. The node will be also selected if it's inside the scene tree.
+			</description>
+		</method>
 		<method name="edit_resource">
 			<return type="void">
 			</return>
diff --git a/doc/classes/EditorPlugin.xml b/doc/classes/EditorPlugin.xml
index 8dcffb0b74..61f1761249 100644
--- a/doc/classes/EditorPlugin.xml
+++ b/doc/classes/EditorPlugin.xml
@@ -214,7 +214,7 @@
 				[gdscript]
 				func forward_canvas_draw_over_viewport(overlay):
 				    # Draw a circle at cursor position.
-				    overlay.draw_circle(overlay.get_local_mouse_position(), 64)
+				    overlay.draw_circle(overlay.get_local_mouse_position(), 64, Color.white)
 
 				func forward_canvas_gui_input(event):
 				    if event is InputEventMouseMotion:
diff --git a/doc/classes/EditorSelection.xml b/doc/classes/EditorSelection.xml
index 1ff9744b70..63e89750c3 100644
--- a/doc/classes/EditorSelection.xml
+++ b/doc/classes/EditorSelection.xml
@@ -17,6 +17,7 @@
 			</argument>
 			<description>
 				Adds a node to the selection.
+				[b]Note:[/b] The newly selected node will not be automatically edited in the inspector. If you want to edit a node, use [method EditorInterface.edit_node].
 			</description>
 		</method>
 		<method name="clear">
diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml
index 7750d45226..b5335e47cd 100644
--- a/doc/classes/Node.xml
+++ b/doc/classes/Node.xml
@@ -37,13 +37,13 @@
 				Corresponds to the [constant NOTIFICATION_EXIT_TREE] notification in [method Object._notification] and signal [signal tree_exiting]. To get notified when the node has already left the active tree, connect to the [signal tree_exited].
 			</description>
 		</method>
-		<method name="_get_configuration_warning" qualifiers="virtual">
-			<return type="String">
+		<method name="_get_configuration_warnings" qualifiers="virtual">
+			<return type="Array">
 			</return>
 			<description>
-				The string returned from this method is displayed as a warning in the Scene Dock if the script that overrides it is a [code]tool[/code] script.
-				Returning an empty string produces no warning.
-				Call [method update_configuration_warning] when the warning needs to be updated for this node.
+				The elements in the array returned from this method are displayed as warnings in the Scene Dock if the script that overrides it is a [code]tool[/code] script.
+				Returning an empty array produces no warnings.
+				Call [method update_configuration_warnings] when the warnings need to be updated for this node.
 			</description>
 		</method>
 		<method name="_input" qualifiers="virtual">
@@ -856,12 +856,12 @@
 				Sets whether this is an instance load placeholder. See [InstancePlaceholder].
 			</description>
 		</method>
-		<method name="update_configuration_warning">
+		<method name="update_configuration_warnings">
 			<return type="void">
 			</return>
 			<description>
 				Updates the warning displayed for this node in the Scene Dock.
-				Use [method _get_configuration_warning] to setup the warning message to display.
+				Use [method _get_configuration_warnings] to setup the warning message to display.
 			</description>
 		</method>
 	</methods>
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index 5b9150ab04..5cac715408 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -6,7 +6,7 @@
 	<description>
 		Contains global variables accessible from everywhere. Use [method get_setting], [method set_setting] or [method has_setting] to access them. Variables stored in [code]project.godot[/code] are also loaded into ProjectSettings, making this object very useful for reading custom game configuration options.
 		When naming a Project Settings property, use the full path to the setting including the category. For example, [code]"application/config/name"[/code] for the project name. Category and property names can be viewed in the Project Settings dialog.
-		[b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary.
+		[b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary. Overriding will still take the base project settings' [url=https://docs.godotengine.org/en/latest/tutorials/export/feature_tags.html]feature tags[/url] in account. Therefore, make sure to [i]also[/i] override the setting with the desired feature tags if you want them to override base project settings on all platforms and configurations.
 	</description>
 	<tutorials>
 		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
@@ -255,8 +255,8 @@
 			[b]Note:[/b] Changing this value will also change the user data folder's path if [member application/config/use_custom_user_dir] is [code]false[/code]. After renaming the project, you will no longer be able to access existing data in [code]user://[/code] unless you rename the old folder to match the new project name. See [url=https://docs.godotengine.org/en/latest/tutorials/io/data_paths.html]Data paths[/url] in the documentation for more information.
 		</member>
 		<member name="application/config/project_settings_override" type="String" setter="" getter="" default="&quot;&quot;">
-			Specifies a file to override project settings. For example: [code]user://custom_settings.cfg[/code].
-			[b]Note:[/b] Regardless of this setting's value, [code]res://override.cfg[/code] will still be read to override the project settings (see this class' description at the top).
+			Specifies a file to override project settings. For example: [code]user://custom_settings.cfg[/code]. See "Overriding" in the [ProjectSettings] class description at the top for more information.
+			[b]Note:[/b] Regardless of this setting's value, [code]res://override.cfg[/code] will still be read to override the project settings.
 		</member>
 		<member name="application/config/use_custom_user_dir" type="bool" setter="" getter="" default="false">
 			If [code]true[/code], the project will save user data to its own user directory (see [member application/config/custom_user_dir_name]). This setting is only effective on desktop platforms. A name must be set in the [member application/config/custom_user_dir_name] setting for this to take effect. If [code]false[/code], the project will save user data to [code](OS user data directory)/Godot/app_userdata/(project name)[/code].
diff --git a/doc/tools/makerst.py b/doc/tools/makerst.py
index ae3cc73098..1c6055f8ca 100755
--- a/doc/tools/makerst.py
+++ b/doc/tools/makerst.py
@@ -437,7 +437,7 @@ def make_rst_class(class_def, state, dry_run, output_dir):  # type: (ClassDef, S
         for property_def in class_def.properties.values():
             type_rst = property_def.type_name.to_rst(state)
             default = property_def.default_value
-            if property_def.overridden:
+            if default is not None and property_def.overridden:
                 ml.append((type_rst, property_def.name, default + " *(parent override)*"))
             else:
                 ref = ":ref:`{0}<class_{1}_property_{0}>`".format(property_def.name, class_name)
diff --git a/editor/code_editor.cpp b/editor/code_editor.cpp
index ac8bef817b..6ed2cb9d9c 100644
--- a/editor/code_editor.cpp
+++ b/editor/code_editor.cpp
@@ -142,7 +142,7 @@ bool FindReplaceBar::_search(uint32_t p_flags, int p_from_line, int p_from_col)
 	bool found = text_editor->search(text, p_flags, p_from_line, p_from_col, line, col);
 
 	if (found) {
-		if (!preserve_cursor) {
+		if (!preserve_cursor && !is_selection_only()) {
 			text_editor->unfold_line(line);
 			text_editor->cursor_set_line(line, false);
 			text_editor->cursor_set_column(col + text.length(), false);
diff --git a/editor/debugger/editor_profiler.cpp b/editor/debugger/editor_profiler.cpp
index c4290b7cca..6befee090b 100644
--- a/editor/debugger/editor_profiler.cpp
+++ b/editor/debugger/editor_profiler.cpp
@@ -43,28 +43,34 @@ void EditorProfiler::_make_metric_ptrs(Metric &m) {
 	}
 }
 
+EditorProfiler::Metric EditorProfiler::_get_frame_metric(int index) {
+	return frame_metrics[(frame_metrics.size() + last_metric - (total_metrics - 1) + index) % frame_metrics.size()];
+}
+
 void EditorProfiler::add_frame_metric(const Metric &p_metric, bool p_final) {
 	++last_metric;
 	if (last_metric >= frame_metrics.size()) {
 		last_metric = 0;
 	}
 
+	total_metrics++;
+	if (total_metrics > frame_metrics.size()) {
+		total_metrics = frame_metrics.size();
+	}
+
 	frame_metrics.write[last_metric] = p_metric;
 	_make_metric_ptrs(frame_metrics.write[last_metric]);
 
 	updating_frame = true;
-	cursor_metric_edit->set_max(frame_metrics[last_metric].frame_number);
-	cursor_metric_edit->set_min(MAX(frame_metrics[last_metric].frame_number - frame_metrics.size(), 0));
+	clear_button->set_disabled(false);
+	cursor_metric_edit->set_editable(true);
+	cursor_metric_edit->set_max(p_metric.frame_number);
+	cursor_metric_edit->set_min(_get_frame_metric(0).frame_number);
 
 	if (!seeking) {
-		cursor_metric_edit->set_value(frame_metrics[last_metric].frame_number);
-		if (hover_metric != -1) {
-			hover_metric++;
-			if (hover_metric >= frame_metrics.size()) {
-				hover_metric = 0;
-			}
-		}
+		cursor_metric_edit->set_value(p_metric.frame_number);
 	}
+
 	updating_frame = false;
 
 	if (frame_delay->is_stopped()) {
@@ -83,6 +89,7 @@ void EditorProfiler::clear() {
 	metric_size = CLAMP(metric_size, 60, 1024);
 	frame_metrics.clear();
 	frame_metrics.resize(metric_size);
+	total_metrics = 0;
 	last_metric = -1;
 	variables->clear();
 	plot_sigs.clear();
@@ -93,6 +100,7 @@ void EditorProfiler::clear() {
 	cursor_metric_edit->set_min(0);
 	cursor_metric_edit->set_max(100); // Doesn't make much sense, but we can't have min == max. Doesn't hurt.
 	cursor_metric_edit->set_value(0);
+	cursor_metric_edit->set_editable(false);
 	updating_frame = false;
 	hover_metric = -1;
 	seeking = false;
@@ -187,11 +195,8 @@ void EditorProfiler::_update_plot() {
 	const bool use_self = display_time->get_selected() == DISPLAY_SELF_TIME;
 	float highest = 0;
 
-	for (int i = 0; i < frame_metrics.size(); i++) {
-		const Metric &m = frame_metrics[i];
-		if (!m.valid) {
-			continue;
-		}
+	for (int i = 0; i < total_metrics; i++) {
+		const Metric &m = _get_frame_metric(i);
 
 		for (Set<StringName>::Element *E = plot_sigs.front(); E; E = E->next()) {
 			const Map<StringName, Metric::Category *>::Element *F = m.category_ptrs.find(E->get());
@@ -220,78 +225,43 @@ void EditorProfiler::_update_plot() {
 
 		int *column = columnv.ptrw();
 
-		Map<StringName, int> plot_prev;
-		//Map<StringName,int> plot_max;
+		Map<StringName, int> prev_plots;
 
-		for (int i = 0; i < w; i++) {
+		for (int i = 0; i < total_metrics * w / frame_metrics.size() - 1; i++) {
 			for (int j = 0; j < h * 4; j++) {
 				column[j] = 0;
 			}
 
 			int current = i * frame_metrics.size() / w;
-			int next = (i + 1) * frame_metrics.size() / w;
-			if (next > frame_metrics.size()) {
-				next = frame_metrics.size();
-			}
-			if (next == current) {
-				next = current + 1; //just because for loop must work
-			}
 
 			for (Set<StringName>::Element *E = plot_sigs.front(); E; E = E->next()) {
-				int plot_pos = -1;
+				const Metric &m = _get_frame_metric(current);
 
-				for (int j = current; j < next; j++) {
-					//wrap
-					int idx = last_metric + 1 + j;
-					while (idx >= frame_metrics.size()) {
-						idx -= frame_metrics.size();
-					}
-
-					//get
-					const Metric &m = frame_metrics[idx];
-					if (!m.valid) {
-						continue; //skip because invalid
-					}
+				float value = 0;
 
-					float value = 0;
-
-					const Map<StringName, Metric::Category *>::Element *F = m.category_ptrs.find(E->get());
-					if (F) {
-						value = F->get()->total_time;
-					}
+				const Map<StringName, Metric::Category *>::Element *F = m.category_ptrs.find(E->get());
+				if (F) {
+					value = F->get()->total_time;
+				}
 
-					const Map<StringName, Metric::Category::Item *>::Element *G = m.item_ptrs.find(E->get());
-					if (G) {
-						if (use_self) {
-							value = G->get()->self;
-						} else {
-							value = G->get()->total;
-						}
+				const Map<StringName, Metric::Category::Item *>::Element *G = m.item_ptrs.find(E->get());
+				if (G) {
+					if (use_self) {
+						value = G->get()->self;
+					} else {
+						value = G->get()->total;
 					}
-
-					plot_pos = MAX(CLAMP(int(value * h / highest), 0, h - 1), plot_pos);
 				}
 
+				int plot_pos = CLAMP(int(value * h / highest), 0, h - 1);
+
 				int prev_plot = plot_pos;
-				Map<StringName, int>::Element *H = plot_prev.find(E->get());
+				Map<StringName, int>::Element *H = prev_plots.find(E->get());
 				if (H) {
 					prev_plot = H->get();
 					H->get() = plot_pos;
 				} else {
-					plot_prev[E->get()] = plot_pos;
-				}
-
-				if (plot_pos == -1 && prev_plot == -1) {
-					//don't bother drawing
-					continue;
-				}
-
-				if (prev_plot != -1 && plot_pos == -1) {
-					plot_pos = prev_plot;
-				}
-
-				if (prev_plot == -1 && plot_pos != -1) {
-					prev_plot = plot_pos;
+					prev_plots[E->get()] = plot_pos;
 				}
 
 				plot_pos = h - plot_pos - 1;
@@ -352,15 +322,13 @@ void EditorProfiler::_update_plot() {
 }
 
 void EditorProfiler::_update_frame() {
-	int cursor_metric = _get_cursor_index();
-
-	ERR_FAIL_INDEX(cursor_metric, frame_metrics.size());
+	int cursor_metric = cursor_metric_edit->get_value() - _get_frame_metric(0).frame_number;
 
 	updating_frame = true;
 	variables->clear();
 
 	TreeItem *root = variables->create_item();
-	const Metric &m = frame_metrics[cursor_metric];
+	const Metric &m = _get_frame_metric(cursor_metric);
 
 	int dtime = display_time->get_selected();
 
@@ -410,6 +378,7 @@ void EditorProfiler::_activate_pressed() {
 	if (activate->is_pressed()) {
 		activate->set_icon(get_theme_icon("Stop", "EditorIcons"));
 		activate->set_text(TTR("Stop"));
+		_clear_pressed();
 	} else {
 		activate->set_icon(get_theme_icon("Play", "EditorIcons"));
 		activate->set_text(TTR("Start"));
@@ -418,6 +387,7 @@ void EditorProfiler::_activate_pressed() {
 }
 
 void EditorProfiler::_clear_pressed() {
+	clear_button->set_disabled(true);
 	clear();
 	_update_plot();
 }
@@ -430,30 +400,16 @@ void EditorProfiler::_notification(int p_what) {
 }
 
 void EditorProfiler::_graph_tex_draw() {
-	if (last_metric < 0) {
+	if (total_metrics == 0) {
 		return;
 	}
 	if (seeking) {
-		int max_frames = frame_metrics.size();
-		int frame = cursor_metric_edit->get_value() - (frame_metrics[last_metric].frame_number - max_frames + 1);
-		if (frame < 0) {
-			frame = 0;
-		}
-
-		int cur_x = frame * graph->get_size().x / max_frames;
-
+		int frame = cursor_metric_edit->get_value() - _get_frame_metric(0).frame_number;
+		int cur_x = (2 * frame + 1) * graph->get_size().x / (2 * frame_metrics.size()) + 1;
 		graph->draw_line(Vector2(cur_x, 0), Vector2(cur_x, graph->get_size().y), Color(1, 1, 1, 0.8));
 	}
-
-	if (hover_metric != -1 && frame_metrics[hover_metric].valid) {
-		int max_frames = frame_metrics.size();
-		int frame = frame_metrics[hover_metric].frame_number - (frame_metrics[last_metric].frame_number - max_frames + 1);
-		if (frame < 0) {
-			frame = 0;
-		}
-
-		int cur_x = frame * graph->get_size().x / max_frames;
-
+	if (hover_metric > -1 && hover_metric < total_metrics) {
+		int cur_x = (2 * hover_metric + 1) * graph->get_size().x / (2 * frame_metrics.size()) + 1;
 		graph->draw_line(Vector2(cur_x, 0), Vector2(cur_x, graph->get_size().y), Color(1, 1, 1, 0.4));
 	}
 }
@@ -484,10 +440,10 @@ void EditorProfiler::_graph_tex_input(const Ref<InputEvent> &p_ev) {
 	if (
 			(mb.is_valid() && mb->get_button_index() == MOUSE_BUTTON_LEFT && mb->is_pressed()) ||
 			(mm.is_valid())) {
-		int x = me->get_position().x;
+		int x = me->get_position().x - 1;
 		x = x * frame_metrics.size() / graph->get_size().width;
 
-		bool show_hover = x >= 0 && x < frame_metrics.size();
+		hover_metric = x;
 
 		if (x < 0) {
 			x = 0;
@@ -497,41 +453,11 @@ void EditorProfiler::_graph_tex_input(const Ref<InputEvent> &p_ev) {
 			x = frame_metrics.size() - 1;
 		}
 
-		int metric = frame_metrics.size() - x - 1;
-		metric = last_metric - metric;
-		while (metric < 0) {
-			metric += frame_metrics.size();
-		}
-
-		if (show_hover) {
-			hover_metric = metric;
-
-		} else {
-			hover_metric = -1;
-		}
-
 		if (mb.is_valid() || mm->get_button_mask() & MOUSE_BUTTON_MASK_LEFT) {
-			//cursor_metric=x;
 			updating_frame = true;
 
-			//metric may be invalid, so look for closest metric that is valid, this makes snap feel better
-			bool valid = false;
-			for (int i = 0; i < frame_metrics.size(); i++) {
-				if (frame_metrics[metric].valid) {
-					valid = true;
-					break;
-				}
-
-				metric++;
-				if (metric >= frame_metrics.size()) {
-					metric = 0;
-				}
-			}
-
-			if (valid) {
-				cursor_metric_edit->set_value(frame_metrics[metric].frame_number);
-			}
-
+			if (x < total_metrics)
+				cursor_metric_edit->set_value(_get_frame_metric(x).frame_number);
 			updating_frame = false;
 
 			if (activate->is_pressed()) {
@@ -552,24 +478,6 @@ void EditorProfiler::_graph_tex_input(const Ref<InputEvent> &p_ev) {
 	}
 }
 
-int EditorProfiler::_get_cursor_index() const {
-	if (last_metric < 0) {
-		return 0;
-	}
-	if (!frame_metrics[last_metric].valid) {
-		return 0;
-	}
-
-	int diff = (frame_metrics[last_metric].frame_number - cursor_metric_edit->get_value());
-
-	int idx = last_metric - diff;
-	while (idx < 0) {
-		idx += frame_metrics.size();
-	}
-
-	return idx;
-}
-
 void EditorProfiler::disable_seeking() {
 	seeking = false;
 	graph->update();
@@ -659,6 +567,7 @@ EditorProfiler::EditorProfiler() {
 	clear_button = memnew(Button);
 	clear_button->set_text(TTR("Clear"));
 	clear_button->connect("pressed", callable_mp(this, &EditorProfiler::_clear_pressed));
+	clear_button->set_disabled(true);
 	hb->add_child(clear_button);
 
 	hb->add_child(memnew(Label(TTR("Measure:"))));
@@ -687,6 +596,8 @@ EditorProfiler::EditorProfiler() {
 
 	cursor_metric_edit = memnew(SpinBox);
 	cursor_metric_edit->set_h_size_flags(SIZE_FILL);
+	cursor_metric_edit->set_value(0);
+	cursor_metric_edit->set_editable(false);
 	hb->add_child(cursor_metric_edit);
 	cursor_metric_edit->connect("value_changed", callable_mp(this, &EditorProfiler::_cursor_metric_changed));
 
@@ -726,6 +637,7 @@ EditorProfiler::EditorProfiler() {
 
 	int metric_size = CLAMP(int(EDITOR_DEF("debugger/profiler_frame_history_size", 600)), 60, 1024);
 	frame_metrics.resize(metric_size);
+	total_metrics = 0;
 	last_metric = -1;
 	hover_metric = -1;
 
diff --git a/editor/debugger/editor_profiler.h b/editor/debugger/editor_profiler.h
index e16bde41f6..8880824b87 100644
--- a/editor/debugger/editor_profiler.h
+++ b/editor/debugger/editor_profiler.h
@@ -106,13 +106,13 @@ private:
 	SpinBox *cursor_metric_edit;
 
 	Vector<Metric> frame_metrics;
+	int total_metrics;
 	int last_metric;
 
 	int max_functions;
 
 	bool updating_frame;
 
-	//int cursor_metric;
 	int hover_metric;
 
 	float graph_height;
@@ -139,14 +139,14 @@ private:
 	void _graph_tex_draw();
 	void _graph_tex_input(const Ref<InputEvent> &p_ev);
 
-	int _get_cursor_index() const;
-
 	Color _get_color_from_signature(const StringName &p_signature) const;
 
 	void _cursor_metric_changed(double);
 
 	void _combo_changed(int);
 
+	Metric _get_frame_metric(int index);
+
 protected:
 	void _notification(int p_what);
 	static void _bind_methods();
diff --git a/editor/editor_help_search.cpp b/editor/editor_help_search.cpp
index a1ff87fe2e..23226ffa9b 100644
--- a/editor/editor_help_search.cpp
+++ b/editor/editor_help_search.cpp
@@ -123,7 +123,7 @@ void EditorHelpSearch::_notification(int p_what) {
 				if (search->work()) {
 					// Search done.
 
-					// Only point to the perfect match if it's a new search, and not just reopening a old one.
+					// Only point to the match if it's a new search, and not just reopening a old one.
 					if (!old_search) {
 						results_tree->ensure_cursor_is_visible();
 					} else {
@@ -310,6 +310,7 @@ bool EditorHelpSearch::Runner::_phase_match_classes_init() {
 	iterator_doc = EditorHelp::get_doc_data()->class_list.front();
 	matches.clear();
 	matched_item = nullptr;
+	match_highest_score = 0;
 
 	return true;
 }
@@ -460,16 +461,20 @@ bool EditorHelpSearch::Runner::_match_string(const String &p_term, const String
 }
 
 void EditorHelpSearch::Runner::_match_item(TreeItem *p_item, const String &p_text) {
-	if (!matched_item) {
-		if (search_flags & SEARCH_CASE_SENSITIVE) {
-			if (p_text.casecmp_to(term) == 0) {
-				matched_item = p_item;
-			}
-		} else {
-			if (p_text.nocasecmp_to(term) == 0) {
-				matched_item = p_item;
-			}
-		}
+	float inverse_length = 1.f / float(p_text.length());
+
+	// Favor types where search term is a substring close to the start of the type.
+	float w = 0.5f;
+	int pos = p_text.findn(term);
+	float score = (pos > -1) ? 1.0f - w * MIN(1, 3 * pos * inverse_length) : MAX(0.f, .9f - w);
+
+	// Favor shorter items: they resemble the search term more.
+	w = 0.1f;
+	score *= (1 - w) + w * (term.length() * inverse_length);
+
+	if (match_highest_score == 0 || score > match_highest_score) {
+		matched_item = p_item;
+		match_highest_score = score;
 	}
 }
 
diff --git a/editor/editor_help_search.h b/editor/editor_help_search.h
index 0e236d523d..350a02315f 100644
--- a/editor/editor_help_search.h
+++ b/editor/editor_help_search.h
@@ -124,6 +124,7 @@ class EditorHelpSearch::Runner : public Reference {
 	TreeItem *root_item = nullptr;
 	Map<String, TreeItem *> class_items;
 	TreeItem *matched_item = nullptr;
+	float match_highest_score = 0;
 
 	bool _is_class_disabled_by_feature_profile(const StringName &p_class);
 
diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp
index 9ca46cfcc0..055baeb81e 100644
--- a/editor/editor_node.cpp
+++ b/editor/editor_node.cpp
@@ -5892,6 +5892,8 @@ EditorNode::EditorNode() {
 	EDITOR_DEF("interface/inspector/resources_to_open_in_new_inspector", "Script,MeshLibrary,TileSet");
 	EDITOR_DEF("interface/inspector/default_color_picker_mode", 0);
 	EditorSettings::get_singleton()->add_property_hint(PropertyInfo(Variant::INT, "interface/inspector/default_color_picker_mode", PROPERTY_HINT_ENUM, "RGB,HSV,RAW", PROPERTY_USAGE_DEFAULT));
+	EDITOR_DEF("interface/inspector/default_color_picker_shape", (int32_t)ColorPicker::SHAPE_VHS_CIRCLE);
+	EditorSettings::get_singleton()->add_property_hint(PropertyInfo(Variant::INT, "interface/inspector/default_color_picker_shape", PROPERTY_HINT_ENUM, "HSV Rectangle,HSV Rectangle Wheel,VHS Circle", PROPERTY_USAGE_DEFAULT));
 	EDITOR_DEF("run/auto_save/save_before_running", true);
 
 	theme_base = memnew(Control);
diff --git a/editor/editor_plugin.cpp b/editor/editor_plugin.cpp
index 064271fce8..eabcbacd9a 100644
--- a/editor/editor_plugin.cpp
+++ b/editor/editor_plugin.cpp
@@ -160,6 +160,10 @@ void EditorInterface::edit_resource(const Ref<Resource> &p_resource) {
 	EditorNode::get_singleton()->edit_resource(p_resource);
 }
 
+void EditorInterface::edit_node(Node *p_node) {
+	EditorNode::get_singleton()->edit_node(p_node);
+}
+
 void EditorInterface::open_scene_from_path(const String &scene_path) {
 	if (EditorNode::get_singleton()->is_changing_scene()) {
 		return;
@@ -312,6 +316,7 @@ void EditorInterface::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_base_control"), &EditorInterface::get_base_control);
 	ClassDB::bind_method(D_METHOD("get_editor_scale"), &EditorInterface::get_editor_scale);
 	ClassDB::bind_method(D_METHOD("edit_resource", "resource"), &EditorInterface::edit_resource);
+	ClassDB::bind_method(D_METHOD("edit_node", "node"), &EditorInterface::edit_node);
 	ClassDB::bind_method(D_METHOD("open_scene_from_path", "scene_filepath"), &EditorInterface::open_scene_from_path);
 	ClassDB::bind_method(D_METHOD("reload_scene_from_path", "scene_filepath"), &EditorInterface::reload_scene_from_path);
 	ClassDB::bind_method(D_METHOD("play_main_scene"), &EditorInterface::play_main_scene);
diff --git a/editor/editor_plugin.h b/editor/editor_plugin.h
index b0713c641b..67b163eabf 100644
--- a/editor/editor_plugin.h
+++ b/editor/editor_plugin.h
@@ -71,6 +71,7 @@ public:
 
 	Control *get_editor_main_control();
 	void edit_resource(const Ref<Resource> &p_resource);
+	void edit_node(Node *p_node);
 	void open_scene_from_path(const String &scene_path);
 	void reload_scene_from_path(const String &scene_path);
 
diff --git a/editor/editor_properties.cpp b/editor/editor_properties.cpp
index fa44239e32..4ea993af5b 100644
--- a/editor/editor_properties.cpp
+++ b/editor/editor_properties.cpp
@@ -2190,6 +2190,9 @@ void EditorPropertyColor::_picker_created() {
 	} else if (default_color_mode == 2) {
 		picker->get_picker()->set_raw_mode(true);
 	}
+
+	int picker_shape = EDITOR_GET("interface/inspector/default_color_picker_shape");
+	picker->get_picker()->set_picker_shape((ColorPicker::PickerShapeType)picker_shape);
 }
 
 void EditorPropertyColor::_picker_opening() {
diff --git a/editor/editor_themes.cpp b/editor/editor_themes.cpp
index 35cf330714..4c5c3af765 100644
--- a/editor/editor_themes.cpp
+++ b/editor/editor_themes.cpp
@@ -1282,6 +1282,7 @@ Ref<Theme> create_editor_theme(const Ref<Theme> p_theme) {
 	theme->set_icon("preset_bg", "ColorPicker", theme->get_icon("GuiMiniCheckerboard", "EditorIcons"));
 	theme->set_icon("overbright_indicator", "ColorPicker", theme->get_icon("OverbrightIndicator", "EditorIcons"));
 	theme->set_icon("bar_arrow", "ColorPicker", theme->get_icon("ColorPickerBarArrow", "EditorIcons"));
+	theme->set_icon("picker_cursor", "ColorPicker", theme->get_icon("PickerCursor", "EditorIcons"));
 
 	theme->set_icon("bg", "ColorPickerButton", theme->get_icon("GuiMiniCheckerboard", "EditorIcons"));
 
diff --git a/editor/icons/PickerCursor.svg b/editor/icons/PickerCursor.svg
new file mode 100644
index 0000000000..88ee3f55ce
--- /dev/null
+++ b/editor/icons/PickerCursor.svg
@@ -0,0 +1 @@
+<svg height="16" viewBox="0 0 16 16" width="16" xmlns="http://www.w3.org/2000/svg"><path d="m8 2a6 6 0 0 0 -6 6 6 6 0 0 0 6 6 6 6 0 0 0 6-6 6 6 0 0 0 -6-6zm0 1a5 5 0 0 1 5 5 5 5 0 0 1 -5 5 5 5 0 0 1 -5-5 5 5 0 0 1 5-5z" fill="#fff"/><path d="m8 3a5 5 0 0 0 -5 5 5 5 0 0 0 5 5 5 5 0 0 0 5-5 5 5 0 0 0 -5-5zm-.0605469 1a4 4 0 0 1 .0605469 0 4 4 0 0 1 4 4 4 4 0 0 1 -4 4 4 4 0 0 1 -4-4 4 4 0 0 1 3.9394531-4z"/></svg>
diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp
index cbe0133034..3df092bc13 100644
--- a/editor/plugins/node_3d_editor_plugin.cpp
+++ b/editor/plugins/node_3d_editor_plugin.cpp
@@ -1279,7 +1279,7 @@ void Node3DEditorViewport::_sinput(const Ref<InputEvent> &p_event) {
 					clicked = ObjectID();
 					clicked_includes_current = false;
 
-					if ((spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_SELECT && b->get_control()) || spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_ROTATE) {
+					if ((spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_SELECT && b->get_command()) || spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_ROTATE) {
 						/* HANDLE ROTATION */
 						if (get_selected_count() == 0) {
 							break; //bye
@@ -2215,6 +2215,12 @@ void Node3DEditorViewport::scale_cursor_distance(real_t scale) {
 		cursor.distance = CLAMP(cursor.distance * scale, min_distance, max_distance);
 	}
 
+	if (cursor.distance == max_distance || cursor.distance == min_distance) {
+		zoom_failed_attempts_count++;
+	} else {
+		zoom_failed_attempts_count = 0;
+	}
+
 	zoom_indicator_delay = ZOOM_FREELOOK_INDICATOR_DELAY_S;
 	surface->update();
 }
@@ -2396,6 +2402,7 @@ void Node3DEditorViewport::_notification(int p_what) {
 			zoom_indicator_delay -= delta;
 			if (zoom_indicator_delay <= 0) {
 				surface->update();
+				zoom_limit_label->hide();
 			}
 		}
 
@@ -2535,6 +2542,8 @@ void Node3DEditorViewport::_notification(int p_what) {
 				cpu_time += cpu_time_history[i];
 			}
 			cpu_time /= FRAME_TIME_HISTORY;
+			// Prevent unrealistically low values.
+			cpu_time = MAX(0.01, cpu_time);
 
 			gpu_time_history[gpu_time_history_index] = RS::get_singleton()->viewport_get_measured_render_time_gpu(viewport->get_viewport_rid());
 			gpu_time_history_index = (gpu_time_history_index + 1) % FRAME_TIME_HISTORY;
@@ -2543,16 +2552,19 @@ void Node3DEditorViewport::_notification(int p_what) {
 				gpu_time += gpu_time_history[i];
 			}
 			gpu_time /= FRAME_TIME_HISTORY;
+			// Prevent division by zero for the FPS counter (and unrealistically low values).
+			// This limits the reported FPS to 100000.
+			gpu_time = MAX(0.01, gpu_time);
 
 			// Color labels depending on performance level ("good" = green, "OK" = yellow, "bad" = red).
 			// Middle point is at 15 ms.
-			cpu_time_label->set_text(vformat(TTR("CPU Time: %s ms"), String::num(cpu_time, 1)));
+			cpu_time_label->set_text(vformat(TTR("CPU Time: %s ms"), rtos(cpu_time).pad_decimals(1)));
 			cpu_time_label->add_theme_color_override(
 					"font_color",
 					frame_time_gradient->get_color_at_offset(
 							Math::range_lerp(cpu_time, 0, 30, 0, 1)));
 
-			gpu_time_label->set_text(vformat(TTR("GPU Time: %s ms"), String::num(gpu_time, 1)));
+			gpu_time_label->set_text(vformat(TTR("GPU Time: %s ms"), rtos(gpu_time).pad_decimals(1)));
 			// Middle point is at 15 ms.
 			gpu_time_label->add_theme_color_override(
 					"font_color",
@@ -2770,6 +2782,7 @@ void Node3DEditorViewport::_draw() {
 
 			} else {
 				// Show zoom
+				zoom_limit_label->set_visible(zoom_failed_attempts_count > 15);
 
 				real_t min_distance = MAX(camera->get_near() * 4, ZOOM_FREELOOK_MIN);
 				real_t max_distance = MIN(camera->get_far() / 4, ZOOM_FREELOOK_MAX);
@@ -3647,9 +3660,9 @@ Vector3 Node3DEditorViewport::_get_instance_position(const Point2 &p_pos) const
 AABB Node3DEditorViewport::_calculate_spatial_bounds(const Node3D *p_parent, bool p_exclude_top_level_transform) {
 	AABB bounds;
 
-	const MeshInstance3D *mesh_instance = Object::cast_to<MeshInstance3D>(p_parent);
-	if (mesh_instance) {
-		bounds = mesh_instance->get_aabb();
+	const VisualInstance3D *visual_instance = Object::cast_to<VisualInstance3D>(p_parent);
+	if (visual_instance) {
+		bounds = visual_instance->get_aabb();
 	}
 
 	for (int i = 0; i < p_parent->get_child_count(); i++) {
@@ -4132,6 +4145,15 @@ Node3DEditorViewport::Node3DEditorViewport(Node3DEditor *p_spatial_editor, Edito
 	locked_label->set_text(TTR("View Rotation Locked"));
 	locked_label->hide();
 
+	zoom_limit_label = memnew(Label);
+	zoom_limit_label->set_anchors_and_offsets_preset(LayoutPreset::PRESET_BOTTOM_LEFT);
+	zoom_limit_label->set_offset(Side::SIDE_TOP, -28 * EDSCALE);
+	zoom_limit_label->set_text(TTR("To zoom further, change the camera's clipping planes (View -> Settings...)"));
+	zoom_limit_label->set_name("ZoomLimitMessageLabel");
+	zoom_limit_label->add_theme_color_override("font_color", Color(1, 1, 1, 1));
+	zoom_limit_label->hide();
+	surface->add_child(zoom_limit_label);
+
 	frame_time_gradient = memnew(Gradient);
 	// The color is set when the theme changes.
 	frame_time_gradient->add_point(0.5, Color());
diff --git a/editor/plugins/node_3d_editor_plugin.h b/editor/plugins/node_3d_editor_plugin.h
index ff4a941b06..70329f90c7 100644
--- a/editor/plugins/node_3d_editor_plugin.h
+++ b/editor/plugins/node_3d_editor_plugin.h
@@ -295,6 +295,7 @@ private:
 	Label *info_label;
 	Label *cinema_label;
 	Label *locked_label;
+	Label *zoom_limit_label;
 
 	VBoxContainer *top_right_vbox;
 	ViewportRotationControl *rotation_control;
@@ -418,6 +419,7 @@ private:
 	void scale_freelook_speed(real_t scale);
 
 	real_t zoom_indicator_delay;
+	int zoom_failed_attempts_count = 0;
 
 	RID move_gizmo_instance[3], move_plane_gizmo_instance[3], rotate_gizmo_instance[4], scale_gizmo_instance[3], scale_plane_gizmo_instance[3];
 
diff --git a/editor/plugins/script_text_editor.cpp b/editor/plugins/script_text_editor.cpp
index 3534809891..c982207224 100644
--- a/editor/plugins/script_text_editor.cpp
+++ b/editor/plugins/script_text_editor.cpp
@@ -1721,6 +1721,9 @@ void ScriptTextEditor::_enable_code_editor() {
 		color_picker->set_raw_mode(true);
 	}
 
+	int picker_shape = EDITOR_GET("interface/inspector/default_color_picker_shape");
+	color_picker->set_picker_shape((ColorPicker::PickerShapeType)picker_shape);
+
 	quick_open = memnew(ScriptEditorQuickOpen);
 	quick_open->connect("goto_line", callable_mp(this, &ScriptTextEditor::_goto_line));
 	add_child(quick_open);
diff --git a/editor/plugins/skeleton_3d_editor_plugin.cpp b/editor/plugins/skeleton_3d_editor_plugin.cpp
index ad60984ad1..404ef62eca 100644
--- a/editor/plugins/skeleton_3d_editor_plugin.cpp
+++ b/editor/plugins/skeleton_3d_editor_plugin.cpp
@@ -515,6 +515,8 @@ void Skeleton3DEditor::_joint_tree_selection_changed() {
 		rest_editor->set_target(bone_path + "rest");
 		custom_pose_editor->set_target(bone_path + "custom_pose");
 
+		_update_properties();
+
 		pose_editor->set_visible(true);
 		rest_editor->set_visible(true);
 		custom_pose_editor->set_visible(true);
diff --git a/editor/property_editor.cpp b/editor/property_editor.cpp
index 0a4f432e4a..1a010b9168 100644
--- a/editor/property_editor.cpp
+++ b/editor/property_editor.cpp
@@ -832,6 +832,9 @@ bool CustomPropertyEditor::edit(Object *p_owner, const String &p_name, Variant::
 				} else if (default_color_mode == 2) {
 					color_picker->set_raw_mode(true);
 				}
+
+				int picker_shape = EDITOR_GET("interface/inspector/default_color_picker_shape");
+				color_picker->set_picker_shape((ColorPicker::PickerShapeType)picker_shape);
 			}
 
 			color_picker->show();
diff --git a/editor/scene_tree_dock.cpp b/editor/scene_tree_dock.cpp
index c62e9cbe5f..5e6ebc22a3 100644
--- a/editor/scene_tree_dock.cpp
+++ b/editor/scene_tree_dock.cpp
@@ -3095,6 +3095,7 @@ SceneTreeDock::SceneTreeDock(EditorNode *p_editor, Node *p_scene_root, EditorSel
 	edit_remote->set_h_size_flags(SIZE_EXPAND_FILL);
 	edit_remote->set_text(TTR("Remote"));
 	edit_remote->set_toggle_mode(true);
+	edit_remote->set_tooltip(TTR("If selected, the Remote scene tree dock will cause the project to stutter every time it updates.\nSwitch back to the Local scene tree dock to improve performance."));
 	edit_remote->connect("pressed", callable_mp(this, &SceneTreeDock::_remote_tree_selected));
 
 	edit_local = memnew(Button);
diff --git a/editor/scene_tree_editor.cpp b/editor/scene_tree_editor.cpp
index b5e9aec854..ec37fa53b3 100644
--- a/editor/scene_tree_editor.cpp
+++ b/editor/scene_tree_editor.cpp
@@ -120,7 +120,7 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i
 		}
 		undo_redo->commit_action();
 	} else if (p_id == BUTTON_WARNING) {
-		String config_err = n->get_configuration_warning();
+		String config_err = n->get_configuration_warnings_as_string();
 		if (config_err == String()) {
 			return;
 		}
@@ -252,9 +252,9 @@ bool SceneTreeEditor::_add_nodes(Node *p_node, TreeItem *p_parent, bool p_scroll
 
 	if (can_rename) { //should be can edit..
 
-		String warning = p_node->get_configuration_warning();
+		String warning = p_node->get_configuration_warnings_as_string();
 		if (!warning.is_empty()) {
-			item->add_button(0, get_theme_icon("NodeWarning", "EditorIcons"), BUTTON_WARNING, false, TTR("Node configuration warning:") + "\n" + p_node->get_configuration_warning());
+			item->add_button(0, get_theme_icon("NodeWarning", "EditorIcons"), BUTTON_WARNING, false, TTR("Node configuration warning:") + "\n" + warning);
 		}
 
 		int num_connections = p_node->get_persistent_signal_connection_count();
diff --git a/editor/settings_config_dialog.cpp b/editor/settings_config_dialog.cpp
index 3be2136a20..81af4996ed 100644
--- a/editor/settings_config_dialog.cpp
+++ b/editor/settings_config_dialog.cpp
@@ -393,9 +393,10 @@ void EditorSettingsDialog::_shortcut_button_pressed(Object *p_item, int p_column
 	TreeItem *ti = Object::cast_to<TreeItem>(p_item);
 	ERR_FAIL_COND(!ti);
 
+	button_idx = p_idx;
+
 	if (ti->get_metadata(0) == "Common") {
 		// Editing a Built-in action, which can have multiple bindings.
-		button_idx = p_idx;
 		editing_action = true;
 		current_action = ti->get_text(0);
 
diff --git a/main/main.cpp b/main/main.cpp
index 4635d377c4..4103fad17c 100644
--- a/main/main.cpp
+++ b/main/main.cpp
@@ -2537,10 +2537,10 @@ bool Main::iteration() {
 	if (frame > 1000000) {
 		if (editor || project_manager) {
 			if (print_fps) {
-				print_line("Editor FPS: " + itos(frames));
+				print_line(vformat("Editor FPS: %d (%s mspf)", frames, rtos(1000.0 / frames).pad_decimals(1)));
 			}
 		} else if (GLOBAL_GET("debug/settings/stdout/print_fps") || print_fps) {
-			print_line("Game FPS: " + itos(frames));
+			print_line(vformat("Project FPS: %d (%s mspf)", frames, rtos(1000.0 / frames).pad_decimals(1)));
 		}
 
 		Engine::get_singleton()->_fps = frames;
diff --git a/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json b/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
index 6bf2edb02d..c4f8f71d0e 100644
--- a/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
+++ b/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
@@ -2,6 +2,6 @@
     "file_format_version" : "1.0.0",
     "ICD": {
         "library_path": "../../../Frameworks/libMoltenVK.dylib",
-        "api_version" : "1.0.0"
+        "api_version" : "1.1.0"
     }
 }
diff --git a/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json b/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
index 6bf2edb02d..c4f8f71d0e 100644
--- a/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
+++ b/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
@@ -2,6 +2,6 @@
     "file_format_version" : "1.0.0",
     "ICD": {
         "library_path": "../../../Frameworks/libMoltenVK.dylib",
-        "api_version" : "1.0.0"
+        "api_version" : "1.1.0"
     }
 }
diff --git a/modules/etc/SCsub b/modules/etc/SCsub
deleted file mode 100644
index 9b46f17916..0000000000
--- a/modules/etc/SCsub
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-
-Import("env")
-Import("env_modules")
-
-env_etc = env_modules.Clone()
-
-# Thirdparty source files
-
-thirdparty_obj = []
-
-# Not unbundled so far since not widespread as shared library
-thirdparty_dir = "#thirdparty/etc2comp/"
-thirdparty_sources = [
-    "EtcBlock4x4.cpp",
-    "EtcBlock4x4Encoding.cpp",
-    "EtcBlock4x4Encoding_ETC1.cpp",
-    "EtcBlock4x4Encoding_R11.cpp",
-    "EtcBlock4x4Encoding_RG11.cpp",
-    "EtcBlock4x4Encoding_RGB8A1.cpp",
-    "EtcBlock4x4Encoding_RGB8.cpp",
-    "EtcBlock4x4Encoding_RGBA8.cpp",
-    "Etc.cpp",
-    "EtcDifferentialTrys.cpp",
-    "EtcFilter.cpp",
-    "EtcImage.cpp",
-    "EtcIndividualTrys.cpp",
-    "EtcMath.cpp",
-    "EtcSortedBlockList.cpp",
-]
-thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
-
-env_etc.Prepend(CPPPATH=[thirdparty_dir])
-
-env_thirdparty = env_etc.Clone()
-env_thirdparty.disable_warnings()
-env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
-env.modules_sources += thirdparty_obj
-
-# Godot source files
-
-module_obj = []
-
-env_etc.add_source_files(module_obj, "*.cpp")
-env.modules_sources += module_obj
-
-# Needed to force rebuilding the module files when the thirdparty library is updated.
-env.Depends(module_obj, thirdparty_obj)
diff --git a/modules/etc/image_compress_etc.cpp b/modules/etc/image_compress_etc.cpp
deleted file mode 100644
index 41cbbe3f54..0000000000
--- a/modules/etc/image_compress_etc.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-/*************************************************************************/
-/*  image_compress_etc.cpp                                               */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "image_compress_etc.h"
-
-#include "core/io/image.h"
-#include "core/os/copymem.h"
-#include "core/os/os.h"
-#include "core/string/print_string.h"
-
-#include <Etc.h>
-#include <EtcFilter.h>
-
-static Image::Format _get_etc2_mode(Image::UsedChannels format) {
-	switch (format) {
-		case Image::USED_CHANNELS_R:
-			return Image::FORMAT_ETC2_R11;
-
-		case Image::USED_CHANNELS_RG:
-			return Image::FORMAT_ETC2_RG11;
-
-		case Image::USED_CHANNELS_RGB:
-			return Image::FORMAT_ETC2_RGB8;
-
-		case Image::USED_CHANNELS_RGBA:
-			return Image::FORMAT_ETC2_RGBA8;
-
-		// TODO: would be nice if we could use FORMAT_ETC2_RGB8A1 for FORMAT_RGBA5551
-		default:
-			// TODO: Kept for compatibility, but should be investigated whether it's correct or if it should error out
-			return Image::FORMAT_ETC2_RGBA8;
-	}
-}
-
-static Etc::Image::Format _image_format_to_etc2comp_format(Image::Format format) {
-	switch (format) {
-		case Image::FORMAT_ETC:
-			return Etc::Image::Format::ETC1;
-
-		case Image::FORMAT_ETC2_R11:
-			return Etc::Image::Format::R11;
-
-		case Image::FORMAT_ETC2_R11S:
-			return Etc::Image::Format::SIGNED_R11;
-
-		case Image::FORMAT_ETC2_RG11:
-			return Etc::Image::Format::RG11;
-
-		case Image::FORMAT_ETC2_RG11S:
-			return Etc::Image::Format::SIGNED_RG11;
-
-		case Image::FORMAT_ETC2_RGB8:
-			return Etc::Image::Format::RGB8;
-
-		case Image::FORMAT_ETC2_RGBA8:
-			return Etc::Image::Format::RGBA8;
-
-		case Image::FORMAT_ETC2_RGB8A1:
-			return Etc::Image::Format::RGB8A1;
-
-		default:
-			ERR_FAIL_V(Etc::Image::Format::UNKNOWN);
-	}
-}
-
-static void _compress_etc(Image *p_img, float p_lossy_quality, bool force_etc1_format, Image::UsedChannels p_channels) {
-	Image::Format img_format = p_img->get_format();
-
-	if (img_format >= Image::FORMAT_DXT1) {
-		return; //do not compress, already compressed
-	}
-
-	if (img_format > Image::FORMAT_RGBA8) {
-		// TODO: we should be able to handle FORMAT_RGBA4444 and FORMAT_RGBA5551 eventually
-		return;
-	}
-
-	// FIXME: Commented out during Vulkan rebase.
-	/*
-	if (force_etc1_format) {
-		// If VRAM compression is using ETC, but image has alpha, convert to RGBA4444 or LA8
-		// This saves space while maintaining the alpha channel
-		if (detected_channels == Image::USED_CHANNELS_RGBA) {
-			if (p_img->has_mipmaps()) {
-				// Image doesn't support mipmaps with RGBA4444 textures
-				p_img->clear_mipmaps();
-			}
-			p_img->convert(Image::FORMAT_RGBA4444);
-			return;
-		} else if (detected_channels == Image::USE_CHANNELS_LA) {
-			p_img->convert(Image::FORMAT_LA8);
-			return;
-		}
-	}
-	*/
-
-	uint32_t imgw = p_img->get_width(), imgh = p_img->get_height();
-
-	Image::Format etc_format = force_etc1_format ? Image::FORMAT_ETC : _get_etc2_mode(p_channels);
-
-	Ref<Image> img = p_img->duplicate();
-
-	if (img->get_format() != Image::FORMAT_RGBA8) {
-		img->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
-	}
-
-	if (img->has_mipmaps()) {
-		if (next_power_of_2(imgw) != imgw || next_power_of_2(imgh) != imgh) {
-			img->resize_to_po2();
-			imgw = img->get_width();
-			imgh = img->get_height();
-		}
-	} else {
-		if (imgw % 4 != 0 || imgh % 4 != 0) {
-			if (imgw % 4) {
-				imgw += 4 - imgw % 4;
-			}
-			if (imgh % 4) {
-				imgh += 4 - imgh % 4;
-			}
-
-			img->resize(imgw, imgh);
-		}
-	}
-
-	const uint8_t *r = img->get_data().ptr();
-	ERR_FAIL_COND(!r);
-
-	unsigned int target_size = Image::get_image_data_size(imgw, imgh, etc_format, p_img->has_mipmaps());
-	int mmc = 1 + (p_img->has_mipmaps() ? Image::get_image_required_mipmaps(imgw, imgh, etc_format) : 0);
-
-	Vector<uint8_t> dst_data;
-	dst_data.resize(target_size);
-
-	uint8_t *w = dst_data.ptrw();
-
-	// prepare parameters to be passed to etc2comp
-	int num_cpus = OS::get_singleton()->get_processor_count();
-	int encoding_time = 0;
-	float effort = 0.0; //default, reasonable time
-
-	if (p_lossy_quality > 0.95) {
-		effort = 80;
-	} else if (p_lossy_quality > 0.85) {
-		effort = 60;
-	} else if (p_lossy_quality > 0.75) {
-		effort = 40;
-	}
-
-	Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBX; // NOTE: we can experiment with other error metrics
-	Etc::Image::Format etc2comp_etc_format = _image_format_to_etc2comp_format(etc_format);
-
-	int wofs = 0;
-
-	print_verbose("ETC: Begin encoding, format: " + Image::get_format_name(etc_format));
-	uint64_t t = OS::get_singleton()->get_ticks_msec();
-	for (int i = 0; i < mmc; i++) {
-		// convert source image to internal etc2comp format (which is equivalent to Image::FORMAT_RGBAF)
-		// NOTE: We can alternatively add a case to Image::convert to handle Image::FORMAT_RGBAF conversion.
-		int mipmap_ofs = 0, mipmap_size = 0, mipmap_w = 0, mipmap_h = 0;
-		img->get_mipmap_offset_size_and_dimensions(i, mipmap_ofs, mipmap_size, mipmap_w, mipmap_h);
-		const uint8_t *src = &r[mipmap_ofs];
-
-		Etc::ColorFloatRGBA *src_rgba_f = new Etc::ColorFloatRGBA[mipmap_w * mipmap_h];
-		for (int j = 0; j < mipmap_w * mipmap_h; j++) {
-			int si = j * 4; // RGBA8
-			src_rgba_f[j] = Etc::ColorFloatRGBA::ConvertFromRGBA8(src[si], src[si + 1], src[si + 2], src[si + 3]);
-		}
-
-		unsigned char *etc_data = nullptr;
-		unsigned int etc_data_len = 0;
-		unsigned int extended_width = 0, extended_height = 0;
-		Etc::Encode((float *)src_rgba_f, mipmap_w, mipmap_h, etc2comp_etc_format, error_metric, effort, num_cpus, num_cpus, &etc_data, &etc_data_len, &extended_width, &extended_height, &encoding_time);
-
-		CRASH_COND(wofs + etc_data_len > target_size);
-		memcpy(&w[wofs], etc_data, etc_data_len);
-		wofs += etc_data_len;
-
-		delete[] etc_data;
-		delete[] src_rgba_f;
-	}
-
-	print_verbose("ETC: Time encoding: " + rtos(OS::get_singleton()->get_ticks_msec() - t));
-
-	p_img->create(imgw, imgh, p_img->has_mipmaps(), etc_format, dst_data);
-}
-
-static void _compress_etc1(Image *p_img, float p_lossy_quality) {
-	_compress_etc(p_img, p_lossy_quality, true, Image::USED_CHANNELS_RGB);
-}
-
-static void _compress_etc2(Image *p_img, float p_lossy_quality, Image::UsedChannels p_channels) {
-	_compress_etc(p_img, p_lossy_quality, false, p_channels);
-}
-
-void _register_etc_compress_func() {
-	Image::_image_compress_etc1_func = _compress_etc1;
-	Image::_image_compress_etc2_func = _compress_etc2;
-}
diff --git a/modules/etc/texture_loader_pkm.cpp b/modules/etc/texture_loader_pkm.cpp
deleted file mode 100644
index 95db9315d5..0000000000
--- a/modules/etc/texture_loader_pkm.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*************************************************************************/
-/*  texture_loader_pkm.cpp                                               */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "texture_loader_pkm.h"
-
-#include "core/os/file_access.h"
-#include <string.h>
-
-struct ETC1Header {
-	char tag[6]; // "PKM 10"
-	uint16_t format = 0; // Format == number of mips (== zero)
-	uint16_t texWidth = 0; // Texture dimensions, multiple of 4 (big-endian)
-	uint16_t texHeight = 0;
-	uint16_t origWidth = 0; // Original dimensions (big-endian)
-	uint16_t origHeight = 0;
-};
-
-RES ResourceFormatPKM::load(const String &p_path, const String &p_original_path, Error *r_error, bool p_use_sub_threads, float *r_progress, CacheMode p_cache_mode) {
-	if (r_error) {
-		*r_error = ERR_CANT_OPEN;
-	}
-
-	Error err;
-	FileAccess *f = FileAccess::open(p_path, FileAccess::READ, &err);
-	if (!f) {
-		return RES();
-	}
-
-	FileAccessRef fref(f);
-	if (r_error) {
-		*r_error = ERR_FILE_CORRUPT;
-	}
-
-	ERR_FAIL_COND_V_MSG(err != OK, RES(), "Unable to open PKM texture file '" + p_path + "'.");
-
-	// big endian
-	f->set_endian_swap(true);
-
-	ETC1Header h;
-	f->get_buffer((uint8_t *)&h.tag, sizeof(h.tag));
-	ERR_FAIL_COND_V_MSG(strncmp(h.tag, "PKM 10", sizeof(h.tag)), RES(), "Invalid or unsupported PKM texture file '" + p_path + "'.");
-
-	h.format = f->get_16();
-	h.texWidth = f->get_16();
-	h.texHeight = f->get_16();
-	h.origWidth = f->get_16();
-	h.origHeight = f->get_16();
-
-	Vector<uint8_t> src_data;
-
-	uint32_t size = h.texWidth * h.texHeight / 2;
-	src_data.resize(size);
-	uint8_t *wb = src_data.ptrw();
-	f->get_buffer(wb, size);
-
-	int mipmaps = h.format;
-	int width = h.origWidth;
-	int height = h.origHeight;
-
-	Ref<Image> img = memnew(Image(width, height, mipmaps, Image::FORMAT_ETC, src_data));
-
-	Ref<ImageTexture> texture = memnew(ImageTexture);
-	texture->create_from_image(img);
-
-	if (r_error) {
-		*r_error = OK;
-	}
-
-	f->close();
-	memdelete(f);
-	return texture;
-}
-
-void ResourceFormatPKM::get_recognized_extensions(List<String> *p_extensions) const {
-	p_extensions->push_back("pkm");
-}
-
-bool ResourceFormatPKM::handles_type(const String &p_type) const {
-	return ClassDB::is_parent_class(p_type, "Texture2D");
-}
-
-String ResourceFormatPKM::get_resource_type(const String &p_path) const {
-	if (p_path.get_extension().to_lower() == "pkm") {
-		return "ImageTexture";
-	}
-	return "";
-}
diff --git a/modules/etc/texture_loader_pkm.h b/modules/etc/texture_loader_pkm.h
deleted file mode 100644
index 2ed5e75807..0000000000
--- a/modules/etc/texture_loader_pkm.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*************************************************************************/
-/*  texture_loader_pkm.h                                                 */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#ifndef TEXTURE_LOADER_PKM_H
-#define TEXTURE_LOADER_PKM_H
-
-#include "core/io/resource_loader.h"
-#include "scene/resources/texture.h"
-
-class ResourceFormatPKM : public ResourceFormatLoader {
-public:
-	virtual RES load(const String &p_path, const String &p_original_path = "", Error *r_error = nullptr, bool p_use_sub_threads = false, float *r_progress = nullptr, CacheMode p_cache_mode = CACHE_MODE_REUSE);
-	virtual void get_recognized_extensions(List<String> *p_extensions) const;
-	virtual bool handles_type(const String &p_type) const;
-	virtual String get_resource_type(const String &p_path) const;
-
-	virtual ~ResourceFormatPKM() {}
-};
-
-#endif // TEXTURE_LOADER_PKM_H
diff --git a/modules/etcpak/SCsub b/modules/etcpak/SCsub
new file mode 100644
index 0000000000..821c6801b4
--- /dev/null
+++ b/modules/etcpak/SCsub
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+Import("env")
+Import("env_modules")
+
+env_etcpak = env_modules.Clone()
+
+# Thirdparty source files
+
+thirdparty_obj = []
+
+thirdparty_dir = "#thirdparty/etcpak/"
+thirdparty_sources = [
+    "Bitmap.cpp",
+    "BitmapDownsampled.cpp",
+    "BlockData.cpp",
+    "ColorSpace.cpp",
+    "DataProvider.cpp",
+    "Debug.cpp",
+    "Dither.cpp",
+    "Error.cpp",
+    "mmap.cpp",
+    "ProcessDxtc.cpp",
+    "ProcessRGB.cpp",
+    "System.cpp",
+    "Tables.cpp",
+    "TaskDispatch.cpp",
+    "Timing.cpp",
+    "lz4/lz4.c",
+]
+thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+
+env_etcpak.Prepend(CPPPATH=[thirdparty_dir])
+
+# Also requires libpng headers
+if env["builtin_libpng"]:
+    env_etcpak.Prepend(CPPPATH=["#thirdparty/libpng"])
+
+env_thirdparty = env_etcpak.Clone()
+env_thirdparty.disable_warnings()
+env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
+env.modules_sources += thirdparty_obj
+
+# Godot source files
+
+module_obj = []
+
+env_etcpak.add_source_files(module_obj, "*.cpp")
+env.modules_sources += module_obj
+
+# Needed to force rebuilding the module files when the thirdparty library is updated.
+env.Depends(module_obj, thirdparty_obj)
diff --git a/modules/etc/config.py b/modules/etcpak/config.py
index 53b8f2f2e3..53b8f2f2e3 100644
--- a/modules/etc/config.py
+++ b/modules/etcpak/config.py
diff --git a/modules/etcpak/image_etcpak.cpp b/modules/etcpak/image_etcpak.cpp
new file mode 100644
index 0000000000..251d2cd7b0
--- /dev/null
+++ b/modules/etcpak/image_etcpak.cpp
@@ -0,0 +1,170 @@
+/*************************************************************************/
+/*  image_etcpak.cpp                                                     */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "image_etcpak.h"
+
+#include "core/os/copymem.h"
+#include "core/os/os.h"
+#include "core/string/print_string.h"
+
+#include "thirdparty/etcpak/ProcessDxtc.hpp"
+#include "thirdparty/etcpak/ProcessRGB.hpp"
+
+// thresholds for the early compression-mode decision scheme in QuickETC2
+// which can be changed by the option -e
+float ecmd_threshold[3] = { 0.03f, 0.09f, 0.38f };
+
+EtcpakType _determine_etc_type(Image::UsedChannels p_source) {
+	switch (p_source) {
+		case Image::USED_CHANNELS_L:
+			return EtcpakType::ETCPAK_TYPE_ETC1;
+		case Image::USED_CHANNELS_LA:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+		case Image::USED_CHANNELS_R:
+			return EtcpakType::ETCPAK_TYPE_ETC2;
+		case Image::USED_CHANNELS_RG:
+			return EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG;
+		case Image::USED_CHANNELS_RGB:
+			return EtcpakType::ETCPAK_TYPE_ETC2;
+		case Image::USED_CHANNELS_RGBA:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+		default:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+	}
+}
+
+EtcpakType _determine_dxt_type(Image::UsedChannels p_source) {
+	switch (p_source) {
+		case Image::USED_CHANNELS_L:
+			return EtcpakType::ETCPAK_TYPE_DXT1;
+		case Image::USED_CHANNELS_LA:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_R:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_RG:
+			return EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG;
+		case Image::USED_CHANNELS_RGB:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_RGBA:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		default:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+	}
+}
+void _compress_etc2(Image *p_img, float p_lossy_quality, Image::UsedChannels p_source) {
+	EtcpakType type = _determine_etc_type(p_source);
+	_compress_etcpak(type, p_img, p_lossy_quality, false, p_source);
+}
+void _compress_bc(Image *p_img, float p_lossy_quality, Image::UsedChannels p_source) {
+	EtcpakType type = _determine_dxt_type(p_source);
+	_compress_etcpak(type, p_img, p_lossy_quality, false, p_source);
+}
+void _compress_etc1(Image *p_img, float p_lossy_quality) {
+	_compress_etcpak(EtcpakType::ETCPAK_TYPE_ETC1, p_img, p_lossy_quality, true, Image::USED_CHANNELS_RGB);
+}
+
+void _compress_etcpak(EtcpakType p_compresstype, Image *p_img, float p_lossy_quality, bool force_etc1_format, Image::UsedChannels p_channels) {
+	uint64_t t = OS::get_singleton()->get_ticks_msec();
+	Image::Format img_format = p_img->get_format();
+
+	if (img_format >= Image::FORMAT_DXT1) {
+		return; //do not compress, already compressed
+	}
+
+	if (img_format > Image::FORMAT_RGBA8) {
+		// TODO: we should be able to handle FORMAT_RGBA4444 and FORMAT_RGBA5551 eventually
+		return;
+	}
+
+	Image::Format format = Image::FORMAT_RGBA8;
+	if (p_img->get_format() != Image::FORMAT_RGBA8) {
+		p_img->convert(Image::FORMAT_RGBA8);
+	}
+	if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC1 || force_etc1_format) {
+		format = Image::FORMAT_ETC;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2) {
+		format = Image::FORMAT_ETC2_RGB8;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG) {
+		format = Image::FORMAT_ETC2_RA_AS_RG;
+		p_img->convert_rg_to_ra_rgba8();
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_ALPHA) {
+		format = Image::FORMAT_ETC2_RGBA8;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT1) {
+		format = Image::FORMAT_DXT1;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG) {
+		format = Image::FORMAT_DXT5_RA_AS_RG;
+		p_img->convert_rg_to_ra_rgba8();
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5) {
+		format = Image::FORMAT_DXT5;
+	} else {
+		ERR_FAIL();
+	}
+
+	const bool mipmap = p_img->has_mipmaps();
+	print_verbose("Encoding format: " + Image::get_format_name(format));
+
+	Ref<Image> new_img;
+	new_img.instance();
+	new_img->create(p_img->get_width(), p_img->get_height(), mipmap, format);
+	Vector<uint8_t> data = new_img->get_data();
+	uint8_t *wr = data.ptrw();
+
+	Ref<Image> image = p_img->duplicate();
+	int mmc = 1 + (mipmap ? Image::get_image_required_mipmaps(new_img->get_width(), new_img->get_height(), format) : 0);
+	for (int i = 0; i < mmc; i++) {
+		int ofs, size, mip_w, mip_h;
+		new_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, mip_w, mip_h);
+		mip_w = (mip_w + 3) & ~3;
+		mip_h = (mip_h + 3) & ~3;
+		Vector<uint8_t> dst_data;
+		dst_data.resize(size);
+		int mipmap_ofs = image->get_mipmap_offset(i);
+
+		const uint32_t *image_read = (const uint32_t *)&image->get_data().ptr()[mipmap_ofs];
+		uint64_t *dst_write = (uint64_t *)dst_data.ptrw();
+		if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC1 || force_etc1_format) {
+			CompressEtc1RgbDither(image_read, dst_write, mip_w * mip_h / 16, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2 || p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG) {
+			CompressEtc2Rgb(image_read, dst_write, mip_w * mip_h / 16, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_ALPHA) {
+			CompressEtc2Rgba(image_read, dst_write, mip_w * mip_h / 16, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5 || p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG) {
+			CompressDxt5(image_read, dst_write, mip_w * mip_h / 16, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT1) {
+			CompressDxt1Dither(image_read, dst_write, mip_w * mip_h / 16, mip_w);
+		} else {
+			ERR_FAIL();
+		}
+		copymem(&wr[ofs], dst_data.ptr(), size);
+	}
+	p_img->create(new_img->get_width(), new_img->get_height(), mipmap, format, data);
+
+	print_verbose(vformat("ETCPAK encode took %s ms.", rtos(OS::get_singleton()->get_ticks_msec() - t)));
+}
diff --git a/modules/etc/register_types.cpp b/modules/etcpak/image_etcpak.h
index b165bccb3e..0137bab7cc 100644
--- a/modules/etc/register_types.cpp
+++ b/modules/etcpak/image_etcpak.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  register_types.cpp                                                   */
+/*  image_etcpak.h                                                       */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,21 +28,24 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "register_types.h"
+#ifndef IMAGE_ETCPAK_H
+#define IMAGE_ETCPAK_H
 
-#include "image_compress_etc.h"
-#include "texture_loader_pkm.h"
+#include "core/io/image.h"
 
-static Ref<ResourceFormatPKM> resource_loader_pkm;
+enum class EtcpakType {
+	ETCPAK_TYPE_ETC1,
+	ETCPAK_TYPE_ETC2,
+	ETCPAK_TYPE_ETC2_ALPHA,
+	ETCPAK_TYPE_ETC2_RA_AS_RG,
+	ETCPAK_TYPE_DXT1,
+	ETCPAK_TYPE_DXT5,
+	ETCPAK_TYPE_DXT5_RA_AS_RG,
+};
 
-void register_etc_types() {
-	resource_loader_pkm.instance();
-	ResourceLoader::add_resource_format_loader(resource_loader_pkm);
+void _compress_etcpak(EtcpakType p_compresstype, Image *p_img, float p_lossy_quality, bool force_etc1_format, Image::UsedChannels p_channels);
+void _compress_etc1(Image *p_img, float p_lossy_quality);
+void _compress_etc2(Image *p_img, float p_lossy_quality, Image::UsedChannels p_source);
+void _compress_bc(Image *p_img, float p_lossy_quality, Image::UsedChannels p_source);
 
-	_register_etc_compress_func();
-}
-
-void unregister_etc_types() {
-	ResourceLoader::remove_resource_format_loader(resource_loader_pkm);
-	resource_loader_pkm.unref();
-}
+#endif // IMAGE_ETCPAK_H
diff --git a/modules/etc/image_compress_etc.h b/modules/etcpak/register_types.cpp
index 44a06194e9..fcc0bc8b6f 100644
--- a/modules/etc/image_compress_etc.h
+++ b/modules/etcpak/register_types.cpp
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  image_compress_etc.h                                                 */
+/*  register_types.cpp                                                   */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,9 +28,15 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef IMAGE_COMPRESS_ETC_H
-#define IMAGE_COMPRESS_ETC_H
+#include "register_types.h"
 
-void _register_etc_compress_func();
+#include "image_etcpak.h"
 
-#endif // IMAGE_COMPRESS_ETC_H
+void register_etcpak_types() {
+	Image::_image_compress_etc1_func = _compress_etc1;
+	Image::_image_compress_etc2_func = _compress_etc2;
+	Image::_image_compress_bc_func = _compress_bc;
+}
+
+void unregister_etcpak_types() {
+}
diff --git a/modules/etc/register_types.h b/modules/etcpak/register_types.h
index e8cbb635ae..9b300a3275 100644
--- a/modules/etc/register_types.h
+++ b/modules/etcpak/register_types.h
@@ -28,10 +28,5 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef ETC_REGISTER_TYPES_H
-#define ETC_REGISTER_TYPES_H
-
-void register_etc_types();
-void unregister_etc_types();
-
-#endif // ETC_REGISTER_TYPES_H
+void register_etcpak_types();
+void unregister_etcpak_types();
diff --git a/modules/gdscript/gdscript_byte_codegen.cpp b/modules/gdscript/gdscript_byte_codegen.cpp
index ec1116197e..b553dcede3 100644
--- a/modules/gdscript/gdscript_byte_codegen.cpp
+++ b/modules/gdscript/gdscript_byte_codegen.cpp
@@ -59,12 +59,7 @@ uint32_t GDScriptByteCodeGenerator::add_local_constant(const StringName &p_name,
 }
 
 uint32_t GDScriptByteCodeGenerator::add_or_get_constant(const Variant &p_constant) {
-	if (constant_map.has(p_constant)) {
-		return constant_map[p_constant];
-	}
-	int index = constant_map.size();
-	constant_map[p_constant] = index;
-	return index;
+	return get_constant_pos(p_constant);
 }
 
 uint32_t GDScriptByteCodeGenerator::add_or_get_name(const StringName &p_name) {
@@ -612,7 +607,8 @@ void GDScriptByteCodeGenerator::write_assign(const Address &p_target, const Addr
 			} break;
 			case GDScriptDataType::NATIVE: {
 				int class_idx = GDScriptLanguage::get_singleton()->get_global_map()[p_target.type.native_type];
-				class_idx |= (GDScriptFunction::ADDR_TYPE_GLOBAL << GDScriptFunction::ADDR_BITS);
+				Variant nc = GDScriptLanguage::get_singleton()->get_global_array()[class_idx];
+				class_idx = get_constant_pos(nc) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 				append(GDScriptFunction::OPCODE_ASSIGN_TYPED_NATIVE, 3);
 				append(p_target);
 				append(p_source);
@@ -621,8 +617,7 @@ void GDScriptByteCodeGenerator::write_assign(const Address &p_target, const Addr
 			case GDScriptDataType::SCRIPT:
 			case GDScriptDataType::GDSCRIPT: {
 				Variant script = p_target.type.script_type;
-				int idx = get_constant_pos(script);
-				idx |= (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
+				int idx = get_constant_pos(script) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 
 				append(GDScriptFunction::OPCODE_ASSIGN_TYPED_SCRIPT, 3);
 				append(p_target);
@@ -673,6 +668,12 @@ void GDScriptByteCodeGenerator::write_assign_default_parameter(const Address &p_
 	function->default_arguments.push_back(opcodes.size());
 }
 
+void GDScriptByteCodeGenerator::write_store_named_global(const Address &p_dst, const StringName &p_global) {
+	append(GDScriptFunction::OPCODE_STORE_NAMED_GLOBAL, 1);
+	append(p_dst);
+	append(p_global);
+}
+
 void GDScriptByteCodeGenerator::write_cast(const Address &p_target, const Address &p_source, const GDScriptDataType &p_type) {
 	int index = 0;
 
@@ -683,16 +684,14 @@ void GDScriptByteCodeGenerator::write_cast(const Address &p_target, const Addres
 		} break;
 		case GDScriptDataType::NATIVE: {
 			int class_idx = GDScriptLanguage::get_singleton()->get_global_map()[p_type.native_type];
-			class_idx |= (GDScriptFunction::ADDR_TYPE_GLOBAL << GDScriptFunction::ADDR_BITS);
+			Variant nc = GDScriptLanguage::get_singleton()->get_global_array()[class_idx];
 			append(GDScriptFunction::OPCODE_CAST_TO_NATIVE, 3);
-			index = class_idx;
+			index = get_constant_pos(nc) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 		} break;
 		case GDScriptDataType::SCRIPT:
 		case GDScriptDataType::GDSCRIPT: {
 			Variant script = p_type.script_type;
-			int idx = get_constant_pos(script);
-			idx |= (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
-
+			int idx = get_constant_pos(script) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 			append(GDScriptFunction::OPCODE_CAST_TO_SCRIPT, 3);
 			index = idx;
 		} break;
@@ -903,7 +902,7 @@ void GDScriptByteCodeGenerator::write_call_self(const Address &p_target, const S
 	for (int i = 0; i < p_arguments.size(); i++) {
 		append(p_arguments[i]);
 	}
-	append(GDScriptFunction::ADDR_TYPE_SELF << GDScriptFunction::ADDR_BITS);
+	append(GDScriptFunction::ADDR_TYPE_STACK << GDScriptFunction::ADDR_BITS);
 	append(p_target);
 	append(p_arguments.size());
 	append(p_function_name);
@@ -914,7 +913,7 @@ void GDScriptByteCodeGenerator::write_call_self_async(const Address &p_target, c
 	for (int i = 0; i < p_arguments.size(); i++) {
 		append(p_arguments[i]);
 	}
-	append(GDScriptFunction::ADDR_TYPE_SELF << GDScriptFunction::ADDR_BITS);
+	append(GDScriptFunction::ADDR_SELF);
 	append(p_target);
 	append(p_arguments.size());
 	append(p_function_name);
@@ -999,7 +998,7 @@ void GDScriptByteCodeGenerator::write_construct_typed_array(const Address &p_tar
 	if (p_element_type.script_type) {
 		Variant script_type = Ref<Script>(p_element_type.script_type);
 		int addr = get_constant_pos(script_type);
-		addr |= GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS;
+		addr |= GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS;
 		append(addr);
 	} else {
 		append(Address()); // null.
@@ -1296,8 +1295,7 @@ void GDScriptByteCodeGenerator::write_return(const Address &p_return_value) {
 				const GDScriptDataType &element_type = function->return_type.get_container_element_type();
 
 				Variant script = function->return_type.script_type;
-				int script_idx = get_constant_pos(script);
-				script_idx |= (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
+				int script_idx = get_constant_pos(script) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 
 				append(GDScriptFunction::OPCODE_RETURN_TYPED_ARRAY, 2);
 				append(p_return_value);
@@ -1326,7 +1324,7 @@ void GDScriptByteCodeGenerator::write_return(const Address &p_return_value) {
 
 					Variant script = function->return_type.script_type;
 					int script_idx = get_constant_pos(script);
-					script_idx |= (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
+					script_idx |= (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 
 					append(GDScriptFunction::OPCODE_RETURN_TYPED_ARRAY, 2);
 					append(p_return_value);
@@ -1343,14 +1341,14 @@ void GDScriptByteCodeGenerator::write_return(const Address &p_return_value) {
 				append(GDScriptFunction::OPCODE_RETURN_TYPED_NATIVE, 2);
 				append(p_return_value);
 				int class_idx = GDScriptLanguage::get_singleton()->get_global_map()[function->return_type.native_type];
-				class_idx |= (GDScriptFunction::ADDR_TYPE_GLOBAL << GDScriptFunction::ADDR_BITS);
+				Variant nc = GDScriptLanguage::get_singleton()->get_global_array()[class_idx];
+				class_idx = get_constant_pos(nc) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 				append(class_idx);
 			} break;
 			case GDScriptDataType::GDSCRIPT:
 			case GDScriptDataType::SCRIPT: {
 				Variant script = function->return_type.script_type;
-				int script_idx = get_constant_pos(script);
-				script_idx |= (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
+				int script_idx = get_constant_pos(script) | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 
 				append(GDScriptFunction::OPCODE_RETURN_TYPED_SCRIPT, 2);
 				append(p_return_value);
diff --git a/modules/gdscript/gdscript_byte_codegen.h b/modules/gdscript/gdscript_byte_codegen.h
index 6eaec91504..4b196ed420 100644
--- a/modules/gdscript/gdscript_byte_codegen.h
+++ b/modules/gdscript/gdscript_byte_codegen.h
@@ -51,11 +51,11 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	List<Map<StringName, int>> block_identifier_stack;
 	Map<StringName, int> block_identifiers;
 
-	int current_stack_size = 0;
+	int current_stack_size = 3; // First 3 spots are reserved for self, class, and nil.
 	int current_temporaries = 0;
 	int current_locals = 0;
 	int current_line = 0;
-	int stack_max = 0;
+	int stack_max = 3;
 	int instr_args_max = 0;
 	int ptrcall_max = 0;
 
@@ -135,7 +135,7 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 			ERR_PRINT("Leaving block with non-zero temporary variables: " + itos(current_temporaries));
 		}
 #endif
-		current_stack_size = current_locals;
+		current_stack_size = current_locals + 3; // Keep the 3 reserved slots for self, class, and nil.
 
 		if (debug_stack) {
 			for (Map<StringName, int>::Element *E = block_identifiers.front(); E; E = E->next()) {
@@ -300,26 +300,19 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	int address_of(const Address &p_address) {
 		switch (p_address.mode) {
 			case Address::SELF:
-				return GDScriptFunction::ADDR_TYPE_SELF << GDScriptFunction::ADDR_BITS;
+				return GDScriptFunction::ADDR_SELF;
 			case Address::CLASS:
-				return GDScriptFunction::ADDR_TYPE_CLASS << GDScriptFunction::ADDR_BITS;
+				return GDScriptFunction::ADDR_CLASS;
 			case Address::MEMBER:
 				return p_address.address | (GDScriptFunction::ADDR_TYPE_MEMBER << GDScriptFunction::ADDR_BITS);
-			case Address::CLASS_CONSTANT:
-				return p_address.address | (GDScriptFunction::ADDR_TYPE_CLASS_CONSTANT << GDScriptFunction::ADDR_BITS);
-			case Address::LOCAL_CONSTANT:
 			case Address::CONSTANT:
-				return p_address.address | (GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT << GDScriptFunction::ADDR_BITS);
+				return p_address.address | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 			case Address::LOCAL_VARIABLE:
 			case Address::TEMPORARY:
 			case Address::FUNCTION_PARAMETER:
 				return p_address.address | (GDScriptFunction::ADDR_TYPE_STACK << GDScriptFunction::ADDR_BITS);
-			case Address::GLOBAL:
-				return p_address.address | (GDScriptFunction::ADDR_TYPE_GLOBAL << GDScriptFunction::ADDR_BITS);
-			case Address::NAMED_GLOBAL:
-				return p_address.address | (GDScriptFunction::ADDR_TYPE_NAMED_GLOBAL << GDScriptFunction::ADDR_BITS);
 			case Address::NIL:
-				return GDScriptFunction::ADDR_TYPE_NIL << GDScriptFunction::ADDR_BITS;
+				return GDScriptFunction::ADDR_NIL;
 		}
 		return -1; // Unreachable.
 	}
@@ -441,6 +434,7 @@ public:
 	virtual void write_assign_true(const Address &p_target) override;
 	virtual void write_assign_false(const Address &p_target) override;
 	virtual void write_assign_default_parameter(const Address &p_dst, const Address &p_src) override;
+	virtual void write_store_named_global(const Address &p_dst, const StringName &p_global) override;
 	virtual void write_cast(const Address &p_target, const Address &p_source, const GDScriptDataType &p_type) override;
 	virtual void write_call(const Address &p_target, const Address &p_base, const StringName &p_function_name, const Vector<Address> &p_arguments) override;
 	virtual void write_super_call(const Address &p_target, const StringName &p_function_name, const Vector<Address> &p_arguments) override;
diff --git a/modules/gdscript/gdscript_codegen.h b/modules/gdscript/gdscript_codegen.h
index 3c05f14cf7..cce4e856c7 100644
--- a/modules/gdscript/gdscript_codegen.h
+++ b/modules/gdscript/gdscript_codegen.h
@@ -45,13 +45,9 @@ public:
 			CLASS,
 			MEMBER,
 			CONSTANT,
-			CLASS_CONSTANT,
-			LOCAL_CONSTANT,
 			LOCAL_VARIABLE,
 			FUNCTION_PARAMETER,
 			TEMPORARY,
-			GLOBAL,
-			NAMED_GLOBAL,
 			NIL,
 		};
 		AddressMode mode = NIL;
@@ -123,6 +119,7 @@ public:
 	virtual void write_assign_true(const Address &p_target) = 0;
 	virtual void write_assign_false(const Address &p_target) = 0;
 	virtual void write_assign_default_parameter(const Address &dst, const Address &src) = 0;
+	virtual void write_store_named_global(const Address &p_dst, const StringName &p_global) = 0;
 	virtual void write_cast(const Address &p_target, const Address &p_source, const GDScriptDataType &p_type) = 0;
 	virtual void write_call(const Address &p_target, const Address &p_base, const StringName &p_function_name, const Vector<Address> &p_arguments) = 0;
 	virtual void write_super_call(const Address &p_target, const StringName &p_function_name, const Vector<Address> &p_arguments) = 0;
diff --git a/modules/gdscript/gdscript_compiler.cpp b/modules/gdscript/gdscript_compiler.cpp
index 6a91148575..abbca899bd 100644
--- a/modules/gdscript/gdscript_compiler.cpp
+++ b/modules/gdscript/gdscript_compiler.cpp
@@ -262,7 +262,7 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 					GDScriptNativeClass *nc = nullptr;
 					while (scr) {
 						if (scr->constants.has(identifier)) {
-							return GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::CLASS_CONSTANT, gen->add_or_get_name(identifier)); // TODO: Get type here.
+							return codegen.add_constant(scr->constants[identifier]); // TODO: Get type here.
 						}
 						if (scr->native.is_valid()) {
 							nc = scr->native.ptr();
@@ -319,7 +319,8 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 
 			if (GDScriptLanguage::get_singleton()->get_global_map().has(identifier)) {
 				int idx = GDScriptLanguage::get_singleton()->get_global_map()[identifier];
-				return GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::GLOBAL, idx); // TODO: Get type.
+				Variant global = GDScriptLanguage::get_singleton()->get_global_array()[idx];
+				return codegen.add_constant(global); // TODO: Get type.
 			}
 
 			// Try global classes.
@@ -347,7 +348,9 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 
 #ifdef TOOLS_ENABLED
 			if (GDScriptLanguage::get_singleton()->get_named_globals_map().has(identifier)) {
-				return GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::NAMED_GLOBAL, gen->add_or_get_name(identifier)); // TODO: Get type.
+				GDScriptCodeGenerator::Address global = codegen.add_temporary(); // TODO: Get type.
+				gen->write_store_named_global(global, identifier);
+				return global;
 			}
 #endif
 
diff --git a/modules/gdscript/gdscript_compiler.h b/modules/gdscript/gdscript_compiler.h
index 651391f972..1b0beec0d4 100644
--- a/modules/gdscript/gdscript_compiler.h
+++ b/modules/gdscript/gdscript_compiler.h
@@ -61,7 +61,7 @@ class GDScriptCompiler {
 
 		GDScriptCodeGenerator::Address add_local_constant(const StringName &p_name, const Variant &p_value) {
 			uint32_t addr = generator->add_local_constant(p_name, p_value);
-			locals[p_name] = GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::LOCAL_CONSTANT, addr);
+			locals[p_name] = GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::CONSTANT, addr);
 			return locals[p_name];
 		}
 
diff --git a/modules/gdscript/gdscript_disassembler.cpp b/modules/gdscript/gdscript_disassembler.cpp
index 33acbb2a35..74da0ee232 100644
--- a/modules/gdscript/gdscript_disassembler.cpp
+++ b/modules/gdscript/gdscript_disassembler.cpp
@@ -69,35 +69,23 @@ static String _disassemble_address(const GDScript *p_script, const GDScriptFunct
 	int addr = p_address & GDScriptFunction::ADDR_MASK;
 
 	switch (p_address >> GDScriptFunction::ADDR_BITS) {
-		case GDScriptFunction::ADDR_TYPE_SELF: {
-			return "self";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_CLASS: {
-			return "class";
-		} break;
 		case GDScriptFunction::ADDR_TYPE_MEMBER: {
 			return "member(" + p_script->debug_get_member_by_index(addr) + ")";
 		} break;
-		case GDScriptFunction::ADDR_TYPE_CLASS_CONSTANT: {
-			return "class_const(" + p_function.get_global_name(addr) + ")";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_LOCAL_CONSTANT: {
+		case GDScriptFunction::ADDR_TYPE_CONSTANT: {
 			return "const(" + _get_variant_string(p_function.get_constant(addr)) + ")";
 		} break;
 		case GDScriptFunction::ADDR_TYPE_STACK: {
-			return "stack(" + itos(addr) + ")";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_STACK_VARIABLE: {
-			return "var_stack(" + itos(addr) + ")";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_GLOBAL: {
-			return "global(" + _get_variant_string(GDScriptLanguage::get_singleton()->get_global_array()[addr]) + ")";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_NAMED_GLOBAL: {
-			return "named_global(" + p_function.get_global_name(addr) + ")";
-		} break;
-		case GDScriptFunction::ADDR_TYPE_NIL: {
-			return "nil";
+			switch (addr) {
+				case GDScriptFunction::ADDR_STACK_SELF:
+					return "self";
+				case GDScriptFunction::ADDR_STACK_CLASS:
+					return "class";
+				case GDScriptFunction::ADDR_STACK_NIL:
+					return "nil";
+				default:
+					return "stack(" + itos(addr) + ")";
+			}
 		} break;
 	}
 
@@ -885,6 +873,14 @@ void GDScriptFunction::disassemble(const Vector<String> &p_code_lines) const {
 				incr += 5;
 			} break;
 				DISASSEMBLE_ITERATE_TYPES(DISASSEMBLE_ITERATE);
+			case OPCODE_STORE_NAMED_GLOBAL: {
+				text += "store named global ";
+				text += DADDR(1);
+				text += " = ";
+				text += String(_global_names_ptr[_code_ptr[ip + 2]]);
+
+				incr += 3;
+			} break;
 			case OPCODE_LINE: {
 				int line = _code_ptr[ip + 1] - 1;
 				if (line >= 0 && line < p_code_lines.size()) {
diff --git a/modules/gdscript/gdscript_function.h b/modules/gdscript/gdscript_function.h
index 9fc75b66ce..414dfab2e7 100644
--- a/modules/gdscript/gdscript_function.h
+++ b/modules/gdscript/gdscript_function.h
@@ -350,6 +350,7 @@ public:
 		OPCODE_ITERATE_PACKED_VECTOR3_ARRAY,
 		OPCODE_ITERATE_PACKED_COLOR_ARRAY,
 		OPCODE_ITERATE_OBJECT,
+		OPCODE_STORE_NAMED_GLOBAL,
 		OPCODE_ASSERT,
 		OPCODE_BREAKPOINT,
 		OPCODE_LINE,
@@ -360,16 +361,18 @@ public:
 		ADDR_BITS = 24,
 		ADDR_MASK = ((1 << ADDR_BITS) - 1),
 		ADDR_TYPE_MASK = ~ADDR_MASK,
-		ADDR_TYPE_SELF = 0,
-		ADDR_TYPE_CLASS = 1,
+		ADDR_TYPE_STACK = 0,
+		ADDR_TYPE_CONSTANT = 1,
 		ADDR_TYPE_MEMBER = 2,
-		ADDR_TYPE_CLASS_CONSTANT = 3,
-		ADDR_TYPE_LOCAL_CONSTANT = 4,
-		ADDR_TYPE_STACK = 5,
-		ADDR_TYPE_STACK_VARIABLE = 6,
-		ADDR_TYPE_GLOBAL = 7,
-		ADDR_TYPE_NAMED_GLOBAL = 8,
-		ADDR_TYPE_NIL = 9
+	};
+
+	enum FixedAddresses {
+		ADDR_STACK_SELF = 0,
+		ADDR_STACK_CLASS = 1,
+		ADDR_STACK_NIL = 2,
+		ADDR_SELF = ADDR_STACK_SELF | (ADDR_TYPE_STACK << ADDR_BITS),
+		ADDR_CLASS = ADDR_STACK_CLASS | (ADDR_TYPE_STACK << ADDR_BITS),
+		ADDR_NIL = ADDR_STACK_NIL | (ADDR_TYPE_STACK << ADDR_BITS),
 	};
 
 	enum Instruction {
@@ -462,7 +465,7 @@ private:
 
 	List<StackDebug> stack_debug;
 
-	_FORCE_INLINE_ Variant *_get_variant(int p_address, GDScriptInstance *p_instance, GDScript *p_script, Variant &self, Variant &static_ref, Variant *p_stack, String &r_error) const;
+	_FORCE_INLINE_ Variant *_get_variant(int p_address, GDScriptInstance *p_instance, Variant *p_stack, String &r_error) const;
 	_FORCE_INLINE_ String _get_call_error(const Callable::CallError &p_err, const String &p_where, const Variant **argptrs) const;
 
 	friend class GDScriptLanguage;
@@ -497,7 +500,6 @@ public:
 #endif
 		Vector<uint8_t> stack;
 		int stack_size = 0;
-		Variant self;
 		uint32_t alloca_size = 0;
 		int ip = 0;
 		int line = 0;
diff --git a/modules/gdscript/gdscript_vm.cpp b/modules/gdscript/gdscript_vm.cpp
index 6b7da4a467..8bf6a8b08b 100644
--- a/modules/gdscript/gdscript_vm.cpp
+++ b/modules/gdscript/gdscript_vm.cpp
@@ -34,22 +34,22 @@
 #include "core/os/os.h"
 #include "gdscript.h"
 
-Variant *GDScriptFunction::_get_variant(int p_address, GDScriptInstance *p_instance, GDScript *p_script, Variant &self, Variant &static_ref, Variant *p_stack, String &r_error) const {
+Variant *GDScriptFunction::_get_variant(int p_address, GDScriptInstance *p_instance, Variant *p_stack, String &r_error) const {
 	int address = p_address & ADDR_MASK;
 
 	//sequential table (jump table generated by compiler)
 	switch ((p_address & ADDR_TYPE_MASK) >> ADDR_BITS) {
-		case ADDR_TYPE_SELF: {
+		case ADDR_TYPE_STACK: {
 #ifdef DEBUG_ENABLED
-			if (unlikely(!p_instance)) {
-				r_error = "Cannot access self without instance.";
-				return nullptr;
-			}
+			ERR_FAIL_INDEX_V(address, _stack_size, nullptr);
 #endif
-			return &self;
+			return &p_stack[address];
 		} break;
-		case ADDR_TYPE_CLASS: {
-			return &static_ref;
+		case ADDR_TYPE_CONSTANT: {
+#ifdef DEBUG_ENABLED
+			ERR_FAIL_INDEX_V(address, _constant_count, nullptr);
+#endif
+			return &_constants_ptr[address];
 		} break;
 		case ADDR_TYPE_MEMBER: {
 #ifdef DEBUG_ENABLED
@@ -61,65 +61,6 @@ Variant *GDScriptFunction::_get_variant(int p_address, GDScriptInstance *p_insta
 			//member indexing is O(1)
 			return &p_instance->members.write[address];
 		} break;
-		case ADDR_TYPE_CLASS_CONSTANT: {
-			//todo change to index!
-			GDScript *s = p_script;
-#ifdef DEBUG_ENABLED
-			ERR_FAIL_INDEX_V(address, _global_names_count, nullptr);
-#endif
-			const StringName *sn = &_global_names_ptr[address];
-
-			while (s) {
-				GDScript *o = s;
-				while (o) {
-					Map<StringName, Variant>::Element *E = o->constants.find(*sn);
-					if (E) {
-						return &E->get();
-					}
-					o = o->_owner;
-				}
-				s = s->_base;
-			}
-
-			ERR_FAIL_V_MSG(nullptr, "GDScriptCompiler bug.");
-		} break;
-		case ADDR_TYPE_LOCAL_CONSTANT: {
-#ifdef DEBUG_ENABLED
-			ERR_FAIL_INDEX_V(address, _constant_count, nullptr);
-#endif
-			return &_constants_ptr[address];
-		} break;
-		case ADDR_TYPE_STACK:
-		case ADDR_TYPE_STACK_VARIABLE: {
-#ifdef DEBUG_ENABLED
-			ERR_FAIL_INDEX_V(address, _stack_size, nullptr);
-#endif
-			return &p_stack[address];
-		} break;
-		case ADDR_TYPE_GLOBAL: {
-#ifdef DEBUG_ENABLED
-			ERR_FAIL_INDEX_V(address, GDScriptLanguage::get_singleton()->get_global_array_size(), nullptr);
-#endif
-			return &GDScriptLanguage::get_singleton()->get_global_array()[address];
-		} break;
-#ifdef TOOLS_ENABLED
-		case ADDR_TYPE_NAMED_GLOBAL: {
-#ifdef DEBUG_ENABLED
-			ERR_FAIL_INDEX_V(address, _global_names_count, nullptr);
-#endif
-			StringName id = _global_names_ptr[address];
-
-			if (GDScriptLanguage::get_singleton()->get_named_globals_map().has(id)) {
-				return (Variant *)&GDScriptLanguage::get_singleton()->get_named_globals_map()[id];
-			} else {
-				r_error = "Autoload singleton '" + String(id) + "' has been removed.";
-				return nullptr;
-			}
-		} break;
-#endif
-		case ADDR_TYPE_NIL: {
-			return &nil;
-		} break;
 	}
 
 	ERR_FAIL_V_MSG(nullptr, "Bad code! (unknown addressing mode).");
@@ -340,6 +281,7 @@ String GDScriptFunction::_get_call_error(const Callable::CallError &p_err, const
 		&&OPCODE_ITERATE_PACKED_VECTOR3_ARRAY,       \
 		&&OPCODE_ITERATE_PACKED_COLOR_ARRAY,         \
 		&&OPCODE_ITERATE_OBJECT,                     \
+		&&OPCODE_STORE_NAMED_GLOBAL,                 \
 		&&OPCODE_ASSERT,                             \
 		&&OPCODE_BREAKPOINT,                         \
 		&&OPCODE_LINE,                               \
@@ -415,11 +357,9 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 
 	r_err.error = Callable::CallError::CALL_OK;
 
-	Variant self;
-	Variant static_ref;
 	Variant retvalue;
 	Variant *stack = nullptr;
-	Variant **instruction_args;
+	Variant **instruction_args = nullptr;
 	const void **call_args_ptr = nullptr;
 	int defarg = 0;
 
@@ -444,7 +384,6 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 		script = p_state->script;
 		p_instance = p_state->instance;
 		defarg = p_state->defarg;
-		self = p_state->self;
 
 	} else {
 		if (p_argcount != _argument_count) {
@@ -462,55 +401,49 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 			}
 		}
 
-		alloca_size = sizeof(Variant *) * _instruction_args_size + sizeof(Variant) * _stack_size;
-
-		if (alloca_size) {
-			uint8_t *aptr = (uint8_t *)alloca(alloca_size);
+		// Add 3 here for self, class, and nil.
+		alloca_size = sizeof(Variant *) * 3 + sizeof(Variant *) * _instruction_args_size + sizeof(Variant) * _stack_size;
 
-			if (_stack_size) {
-				stack = (Variant *)aptr;
-				for (int i = 0; i < p_argcount; i++) {
-					if (!argument_types[i].has_type) {
-						memnew_placement(&stack[i], Variant(*p_args[i]));
-						continue;
-					}
+		uint8_t *aptr = (uint8_t *)alloca(alloca_size);
+		stack = (Variant *)aptr;
 
-					if (!argument_types[i].is_type(*p_args[i], true)) {
-						r_err.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT;
-						r_err.argument = i;
-						r_err.expected = argument_types[i].kind == GDScriptDataType::BUILTIN ? argument_types[i].builtin_type : Variant::OBJECT;
-						return Variant();
-					}
-					if (argument_types[i].kind == GDScriptDataType::BUILTIN) {
-						Variant arg;
-						Variant::construct(argument_types[i].builtin_type, arg, &p_args[i], 1, r_err);
-						memnew_placement(&stack[i], Variant(arg));
-					} else {
-						memnew_placement(&stack[i], Variant(*p_args[i]));
-					}
-				}
-				for (int i = p_argcount; i < _stack_size; i++) {
-					memnew_placement(&stack[i], Variant);
-				}
-			} else {
-				stack = nullptr;
+		for (int i = 0; i < p_argcount; i++) {
+			if (!argument_types[i].has_type) {
+				memnew_placement(&stack[i + 3], Variant(*p_args[i]));
+				continue;
 			}
 
-			if (_instruction_args_size) {
-				instruction_args = (Variant **)&aptr[sizeof(Variant) * _stack_size];
+			if (!argument_types[i].is_type(*p_args[i], true)) {
+				r_err.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT;
+				r_err.argument = i;
+				r_err.expected = argument_types[i].kind == GDScriptDataType::BUILTIN ? argument_types[i].builtin_type : Variant::OBJECT;
+				return Variant();
+			}
+			if (argument_types[i].kind == GDScriptDataType::BUILTIN) {
+				Variant arg;
+				Variant::construct(argument_types[i].builtin_type, arg, &p_args[i], 1, r_err);
+				memnew_placement(&stack[i + 3], Variant(arg));
 			} else {
-				instruction_args = nullptr;
+				memnew_placement(&stack[i + 3], Variant(*p_args[i]));
 			}
+		}
+		for (int i = p_argcount + 3; i < _stack_size; i++) {
+			memnew_placement(&stack[i], Variant);
+		}
 
+		memnew_placement(&stack[ADDR_STACK_NIL], Variant);
+
+		if (_instruction_args_size) {
+			instruction_args = (Variant **)&aptr[sizeof(Variant) * _stack_size];
 		} else {
-			stack = nullptr;
 			instruction_args = nullptr;
 		}
 
 		if (p_instance) {
-			self = p_instance->owner;
+			memnew_placement(&stack[ADDR_STACK_SELF], Variant(p_instance->owner));
 			script = p_instance->script.ptr();
 		} else {
+			memnew_placement(&stack[ADDR_STACK_SELF], Variant);
 			script = _script;
 		}
 	}
@@ -520,7 +453,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 		call_args_ptr = nullptr;
 	}
 
-	static_ref = script;
+	memnew_placement(&stack[ADDR_STACK_CLASS], Variant(script));
 
 	String err_text;
 
@@ -541,10 +474,10 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 #define CHECK_SPACE(m_space) \
 	GD_ERR_BREAK((ip + m_space) > _code_size)
 
-#define GET_VARIANT_PTR(m_v, m_code_ofs)                                                                   \
-	Variant *m_v;                                                                                          \
-	m_v = _get_variant(_code_ptr[ip + m_code_ofs], p_instance, script, self, static_ref, stack, err_text); \
-	if (unlikely(!m_v))                                                                                    \
+#define GET_VARIANT_PTR(m_v, m_code_ofs)                                         \
+	Variant *m_v;                                                                \
+	m_v = _get_variant(_code_ptr[ip + m_code_ofs], p_instance, stack, err_text); \
+	if (unlikely(!m_v))                                                          \
 		OPCODE_BREAK;
 
 #else
@@ -552,7 +485,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 #define CHECK_SPACE(m_space)
 #define GET_VARIANT_PTR(m_v, m_code_ofs) \
 	Variant *m_v;                        \
-	m_v = _get_variant(_code_ptr[ip + m_code_ofs], p_instance, script, self, static_ref, stack, err_text);
+	m_v = _get_variant(_code_ptr[ip + m_code_ofs], p_instance, stack, err_text);
 
 #endif
 
@@ -2038,7 +1971,6 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 						memnew_placement(&gdfs->state.stack.write[sizeof(Variant) * i], Variant(stack[i]));
 					}
 					gdfs->state.stack_size = _stack_size;
-					gdfs->state.self = self;
 					gdfs->state.alloca_size = alloca_size;
 					gdfs->state.ip = ip + 2;
 					gdfs->state.line = line;
@@ -3028,6 +2960,19 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 			}
 			DISPATCH_OPCODE;
 
+			OPCODE(OPCODE_STORE_NAMED_GLOBAL) {
+				CHECK_SPACE(3);
+				int globalname_idx = _code_ptr[ip + 2];
+				GD_ERR_BREAK(globalname_idx < 0 || globalname_idx >= _global_names_count);
+				const StringName *globalname = &_global_names_ptr[globalname_idx];
+
+				GET_INSTRUCTION_ARG(dst, 0);
+				*dst = GDScriptLanguage::get_singleton()->get_named_globals_map()[*globalname];
+
+				ip += 3;
+			}
+			DISPATCH_OPCODE;
+
 			OPCODE(OPCODE_ASSERT) {
 				CHECK_SPACE(3);
 
diff --git a/modules/gltf/gltf_document.cpp b/modules/gltf/gltf_document.cpp
index 0b70175a24..c7a2779655 100644
--- a/modules/gltf/gltf_document.cpp
+++ b/modules/gltf/gltf_document.cpp
@@ -3293,6 +3293,7 @@ Error GLTFDocument::_serialize_materials(Ref<GLTFState> state) {
 				}
 				img->decompress();
 				img->convert(Image::FORMAT_RGBA8);
+				img->convert_ra_rgba8_to_rg();
 				for (int32_t y = 0; y < img->get_height(); y++) {
 					for (int32_t x = 0; x < img->get_width(); x++) {
 						Color c = img->get_pixel(x, y);
diff --git a/modules/squish/image_compress_squish.cpp b/modules/squish/image_compress_squish.cpp
index cce08034df..fb0c7aba1d 100644
--- a/modules/squish/image_compress_squish.cpp
+++ b/modules/squish/image_compress_squish.cpp
@@ -76,83 +76,3 @@ void image_decompress_squish(Image *p_image) {
 		p_image->convert_ra_rgba8_to_rg();
 	}
 }
-
-void image_compress_squish(Image *p_image, float p_lossy_quality, Image::UsedChannels p_channels) {
-	if (p_image->get_format() >= Image::FORMAT_DXT1) {
-		return; //do not compress, already compressed
-	}
-
-	int w = p_image->get_width();
-	int h = p_image->get_height();
-
-	if (p_image->get_format() <= Image::FORMAT_RGBA8) {
-		int squish_comp = squish::kColourRangeFit;
-
-		if (p_lossy_quality > 0.85) {
-			squish_comp = squish::kColourIterativeClusterFit;
-		} else if (p_lossy_quality > 0.75) {
-			squish_comp = squish::kColourClusterFit;
-		}
-
-		Image::Format target_format = Image::FORMAT_RGBA8;
-
-		p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
-
-		switch (p_channels) {
-			case Image::USED_CHANNELS_L: {
-				target_format = Image::FORMAT_DXT1;
-				squish_comp |= squish::kDxt1;
-			} break;
-			case Image::USED_CHANNELS_LA: {
-				target_format = Image::FORMAT_DXT5;
-				squish_comp |= squish::kDxt5;
-			} break;
-			case Image::USED_CHANNELS_R: {
-				target_format = Image::FORMAT_RGTC_R;
-				squish_comp |= squish::kBc4;
-			} break;
-			case Image::USED_CHANNELS_RG: {
-				target_format = Image::FORMAT_RGTC_RG;
-				squish_comp |= squish::kBc5;
-			} break;
-			case Image::USED_CHANNELS_RGB: {
-				target_format = Image::FORMAT_DXT1;
-				squish_comp |= squish::kDxt1;
-			} break;
-			case Image::USED_CHANNELS_RGBA: {
-				//TODO, should convert both, then measure which one does a better job
-				target_format = Image::FORMAT_DXT5;
-				squish_comp |= squish::kDxt5;
-
-			} break;
-			default: {
-				ERR_PRINT("Unknown image format, defaulting to RGBA8");
-				break;
-			}
-		}
-
-		Vector<uint8_t> data;
-		int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
-		int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0;
-		data.resize(target_size);
-		int shift = Image::get_format_pixel_rshift(target_format);
-
-		const uint8_t *rb = p_image->get_data().ptr();
-		uint8_t *wb = data.ptrw();
-
-		int dst_ofs = 0;
-
-		for (int i = 0; i <= mm_count; i++) {
-			int bw = w % 4 != 0 ? w + (4 - w % 4) : w;
-			int bh = h % 4 != 0 ? h + (4 - h % 4) : h;
-
-			int src_ofs = p_image->get_mipmap_offset(i);
-			squish::CompressImage(&rb[src_ofs], w, h, &wb[dst_ofs], squish_comp);
-			dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift;
-			w = MAX(w / 2, 1);
-			h = MAX(h / 2, 1);
-		}
-
-		p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
-	}
-}
diff --git a/modules/squish/image_compress_squish.h b/modules/squish/image_compress_squish.h
index 301d30fcf1..ebc5a41887 100644
--- a/modules/squish/image_compress_squish.h
+++ b/modules/squish/image_compress_squish.h
@@ -33,7 +33,6 @@
 
 #include "core/io/image.h"
 
-void image_compress_squish(Image *p_image, float p_lossy_quality, Image::UsedChannels p_channels);
 void image_decompress_squish(Image *p_image);
 
 #endif // IMAGE_COMPRESS_SQUISH_H
diff --git a/modules/squish/register_types.cpp b/modules/squish/register_types.cpp
index 451e9d8e93..c51cdc9521 100644
--- a/modules/squish/register_types.cpp
+++ b/modules/squish/register_types.cpp
@@ -32,7 +32,6 @@
 #include "image_compress_squish.h"
 
 void register_squish_types() {
-	Image::set_compress_bc_func(image_compress_squish);
 	Image::_image_decompress_bc = image_decompress_squish;
 }
 
diff --git a/platform/android/export/export.cpp b/platform/android/export/export.cpp
index 5f9f420b59..7e3bdfe3b4 100644
--- a/platform/android/export/export.cpp
+++ b/platform/android/export/export.cpp
@@ -201,8 +201,10 @@ static const char *android_perms[] = {
 	nullptr
 };
 
-static const char *SPLASH_IMAGE_EXPORT_PATH = "res/drawable/splash.png";
-static const char *SPLASH_BG_COLOR_PATH = "res/drawable/splash_bg_color.png";
+static const char *SPLASH_IMAGE_EXPORT_PATH = "res/drawable-nodpi/splash.png";
+static const char *LEGACY_BUILD_SPLASH_IMAGE_EXPORT_PATH = "res/drawable-nodpi-v4/splash.png";
+static const char *SPLASH_BG_COLOR_PATH = "res/drawable-nodpi/splash_bg_color.png";
+static const char *LEGACY_BUILD_SPLASH_BG_COLOR_PATH = "res/drawable-nodpi-v4/splash_bg_color.png";
 static const char *SPLASH_CONFIG_PATH = "res://android/build/res/drawable/splash_drawable.xml";
 
 const String SPLASH_CONFIG_XML_CONTENT = R"SPLASH(<?xml version="1.0" encoding="utf-8"?>
@@ -210,7 +212,7 @@ const String SPLASH_CONFIG_XML_CONTENT = R"SPLASH(<?xml version="1.0" encoding="
 	<item android:drawable="@drawable/splash_bg_color" />
 	<item>
 		<bitmap
-				android:gravity="%s"
+				android:gravity="center"
 				android:filter="%s"
 				android:src="@drawable/splash" />
 	</item>
@@ -1502,6 +1504,21 @@ class EditorExportPlatformAndroid : public EditorExportPlatform {
 			splash_image = Ref<Image>(memnew(Image(boot_splash_png)));
 		}
 
+		if (scale_splash) {
+			Size2 screen_size = Size2(ProjectSettings::get_singleton()->get("display/window/size/width"), ProjectSettings::get_singleton()->get("display/window/size/height"));
+			int width, height;
+			if (screen_size.width > screen_size.height) {
+				// scale horizontally
+				height = screen_size.height;
+				width = splash_image->get_width() * screen_size.height / splash_image->get_height();
+			} else {
+				// scale vertically
+				width = screen_size.width;
+				height = splash_image->get_height() * screen_size.width / splash_image->get_width();
+			}
+			splash_image->resize(width, height);
+		}
+
 		// Setup the splash bg color
 		bool bg_color_valid;
 		Color bg_color = ProjectSettings::get_singleton()->get("application/boot_splash/bg_color", &bg_color_valid);
@@ -1514,8 +1531,7 @@ class EditorExportPlatformAndroid : public EditorExportPlatform {
 		splash_bg_color_image->create(splash_image->get_width(), splash_image->get_height(), false, splash_image->get_format());
 		splash_bg_color_image->fill(bg_color);
 
-		String gravity = scale_splash ? "fill" : "center";
-		String processed_splash_config_xml = vformat(SPLASH_CONFIG_XML_CONTENT, gravity, bool_to_string(apply_filter));
+		String processed_splash_config_xml = vformat(SPLASH_CONFIG_XML_CONTENT, bool_to_string(apply_filter));
 		return processed_splash_config_xml;
 	}
 
@@ -2702,12 +2718,12 @@ public:
 			}
 
 			// Process the splash image
-			if (file == SPLASH_IMAGE_EXPORT_PATH && splash_image.is_valid() && !splash_image->is_empty()) {
+			if ((file == SPLASH_IMAGE_EXPORT_PATH || file == LEGACY_BUILD_SPLASH_IMAGE_EXPORT_PATH) && splash_image.is_valid() && !splash_image->is_empty()) {
 				_load_image_data(splash_image, data);
 			}
 
 			// Process the splash bg color image
-			if (file == SPLASH_BG_COLOR_PATH && splash_bg_color_image.is_valid() && !splash_bg_color_image->is_empty()) {
+			if ((file == SPLASH_BG_COLOR_PATH || file == LEGACY_BUILD_SPLASH_BG_COLOR_PATH) && splash_bg_color_image.is_valid() && !splash_bg_color_image->is_empty()) {
 				_load_image_data(splash_bg_color_image, data);
 			}
 
diff --git a/platform/android/java/app/config.gradle b/platform/android/java/app/config.gradle
index c0ae4007d2..ad9a19e2af 100644
--- a/platform/android/java/app/config.gradle
+++ b/platform/android/java/app/config.gradle
@@ -239,5 +239,5 @@ ext.shouldSign = { ->
 }
 
 ext.shouldNotStrip = { ->
-    return isAndroidStudio()
+    return isAndroidStudio() || project.hasProperty("doNotStrip")
 }
diff --git a/platform/android/java/app/res/drawable/splash.png b/platform/android/java/app/res/drawable-nodpi/splash.png
index 7bddd4325a..7bddd4325a 100644
--- a/platform/android/java/app/res/drawable/splash.png
+++ b/platform/android/java/app/res/drawable-nodpi/splash.png
diff --git a/platform/android/java/app/res/drawable/splash_bg_color.png b/platform/android/java/app/res/drawable-nodpi/splash_bg_color.png
index 004b6fd508..004b6fd508 100644
--- a/platform/android/java/app/res/drawable/splash_bg_color.png
+++ b/platform/android/java/app/res/drawable-nodpi/splash_bg_color.png
diff --git a/platform/android/java/build.gradle b/platform/android/java/build.gradle
index ec02b0fc7a..81570d9d86 100644
--- a/platform/android/java/build.gradle
+++ b/platform/android/java/build.gradle
@@ -122,16 +122,17 @@ task zipCustomBuild(type: Zip) {
     destinationDir(file(binDir))
 }
 
-/**
- * Master task used to coordinate the tasks defined above to generate the set of Godot templates.
- */
-task generateGodotTemplates(type: GradleBuild) {
+def templateExcludedBuildTask() {
     // We exclude these gradle tasks so we can run the scons command manually.
+    def excludedTasks = []
     for (String buildType : supportedTargets) {
-        startParameter.excludedTaskNames += ":lib:" + getSconsTaskName(buildType)
+        excludedTasks += ":lib:" + getSconsTaskName(buildType)
     }
+    return excludedTasks
+}
 
-    tasks = []
+def templateBuildTasks() {
+    def tasks = []
 
     // Only build the apks and aar files for which we have native shared libraries.
     for (String target : supportedTargets) {
@@ -152,6 +153,29 @@ task generateGodotTemplates(type: GradleBuild) {
         }
     }
 
+    return tasks
+}
+
+/**
+ * Master task used to coordinate the tasks defined above to generate the set of Godot templates.
+ */
+task generateGodotTemplates(type: GradleBuild) {
+    startParameter.excludedTaskNames = templateExcludedBuildTask()
+    tasks = templateBuildTasks()
+
+    finalizedBy 'zipCustomBuild'
+}
+
+/**
+ * Generates the same output as generateGodotTemplates but with dev symbols
+ */
+task generateDevTemplate (type: GradleBuild) {
+    // add parameter to set symbols to true
+    startParameter.projectProperties += [doNotStrip: true]
+
+    startParameter.excludedTaskNames = templateExcludedBuildTask()
+    tasks = templateBuildTasks()
+
     finalizedBy 'zipCustomBuild'
 }
 
diff --git a/platform/android/java/gradle.properties b/platform/android/java/gradle.properties
index 2dc069ad2f..6b3b62a9da 100644
--- a/platform/android/java/gradle.properties
+++ b/platform/android/java/gradle.properties
@@ -12,7 +12,7 @@ android.useAndroidX=true
 
 # Specifies the JVM arguments used for the daemon process.
 # The setting is particularly useful for tweaking memory settings.
-org.gradle.jvmargs=-Xmx1536m
+org.gradle.jvmargs=-Xmx4536m
 
 # When configured, Gradle will run in incubating parallel mode.
 # This option should only be used with decoupled projects. More details, visit
diff --git a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
index ec2ace4821..1ed16e04ca 100644
--- a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
@@ -63,30 +63,27 @@ public abstract class FullScreenGodotApp extends FragmentActivity implements God
 
 	@Override
 	public void onNewIntent(Intent intent) {
+		super.onNewIntent(intent);
 		if (godotFragment != null) {
 			godotFragment.onNewIntent(intent);
-		} else {
-			super.onNewIntent(intent);
 		}
 	}
 
 	@CallSuper
 	@Override
 	public void onActivityResult(int requestCode, int resultCode, Intent data) {
+		super.onActivityResult(requestCode, resultCode, data);
 		if (godotFragment != null) {
 			godotFragment.onActivityResult(requestCode, resultCode, data);
-		} else {
-			super.onActivityResult(requestCode, resultCode, data);
 		}
 	}
 
 	@CallSuper
 	@Override
 	public void onRequestPermissionsResult(int requestCode, String[] permissions, int[] grantResults) {
+		super.onRequestPermissionsResult(requestCode, permissions, grantResults);
 		if (godotFragment != null) {
 			godotFragment.onRequestPermissionsResult(requestCode, permissions, grantResults);
-		} else {
-			super.onRequestPermissionsResult(requestCode, permissions, grantResults);
 		}
 	}
 
diff --git a/platform/iphone/plugin/godot_plugin_config.h b/platform/iphone/plugin/godot_plugin_config.h
index f4e30c8349..e2546e733c 100644
--- a/platform/iphone/plugin/godot_plugin_config.h
+++ b/platform/iphone/plugin/godot_plugin_config.h
@@ -218,8 +218,9 @@ static inline uint64_t get_plugin_modification_time(const PluginConfigIOS &plugi
 	} else {
 		String file_path = plugin_config.binary.get_base_dir();
 		String file_name = plugin_config.binary.get_basename().get_file();
-		String release_file_name = file_path.plus_file(file_name + ".release.a");
-		String debug_file_name = file_path.plus_file(file_name + ".debug.a");
+		String plugin_extension = plugin_config.binary.get_extension();
+		String release_file_name = file_path.plus_file(file_name + ".release." + plugin_extension);
+		String debug_file_name = file_path.plus_file(file_name + ".debug." + plugin_extension);
 
 		last_updated = MAX(last_updated, FileAccess::get_modified_time(release_file_name));
 		last_updated = MAX(last_updated, FileAccess::get_modified_time(debug_file_name));
diff --git a/scene/2d/animated_sprite_2d.cpp b/scene/2d/animated_sprite_2d.cpp
index 4aa079b013..9ee37670d1 100644
--- a/scene/2d/animated_sprite_2d.cpp
+++ b/scene/2d/animated_sprite_2d.cpp
@@ -272,7 +272,7 @@ void AnimatedSprite2D::set_sprite_frames(const Ref<SpriteFrames> &p_frames) {
 	notify_property_list_changed();
 	_reset_timeout();
 	update();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<SpriteFrames> AnimatedSprite2D::get_sprite_frames() const {
@@ -440,17 +440,14 @@ StringName AnimatedSprite2D::get_animation() const {
 	return animation;
 }
 
-String AnimatedSprite2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> AnimatedSprite2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (frames.is_null()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("A SpriteFrames resource must be created or set in the \"Frames\" property in order for AnimatedSprite to display frames.");
+		warnings.push_back(TTR("A SpriteFrames resource must be created or set in the \"Frames\" property in order for AnimatedSprite to display frames."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void AnimatedSprite2D::_bind_methods() {
diff --git a/scene/2d/animated_sprite_2d.h b/scene/2d/animated_sprite_2d.h
index 14ecb18866..ef0027edf1 100644
--- a/scene/2d/animated_sprite_2d.h
+++ b/scene/2d/animated_sprite_2d.h
@@ -109,7 +109,7 @@ public:
 	void set_flip_v(bool p_flip);
 	bool is_flipped_v() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 	AnimatedSprite2D();
 };
 
diff --git a/scene/2d/canvas_modulate.cpp b/scene/2d/canvas_modulate.cpp
index 5d5aaae505..52eabefbcb 100644
--- a/scene/2d/canvas_modulate.cpp
+++ b/scene/2d/canvas_modulate.cpp
@@ -51,7 +51,7 @@ void CanvasModulate::_notification(int p_what) {
 			remove_from_group("_canvas_modulate_" + itos(get_canvas().get_id()));
 		}
 
-		update_configuration_warning();
+		update_configuration_warnings();
 	}
 }
 
@@ -73,24 +73,19 @@ Color CanvasModulate::get_color() const {
 	return color;
 }
 
-String CanvasModulate::get_configuration_warning() const {
-	if (!is_visible_in_tree() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> CanvasModulate::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	List<Node *> nodes;
-	get_tree()->get_nodes_in_group("_canvas_modulate_" + itos(get_canvas().get_id()), &nodes);
+	if (is_visible_in_tree() && is_inside_tree()) {
+		List<Node *> nodes;
+		get_tree()->get_nodes_in_group("_canvas_modulate_" + itos(get_canvas().get_id()), &nodes);
 
-	if (nodes.size() > 1) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+		if (nodes.size() > 1) {
+			warnings.push_back(TTR("Only one visible CanvasModulate is allowed per scene (or set of instanced scenes). The first created one will work, while the rest will be ignored."));
 		}
-		warning += TTR("Only one visible CanvasModulate is allowed per scene (or set of instanced scenes). The first created one will work, while the rest will be ignored.");
 	}
 
-	return warning;
+	return warnings;
 }
 
 CanvasModulate::CanvasModulate() {
diff --git a/scene/2d/canvas_modulate.h b/scene/2d/canvas_modulate.h
index 4d55a5d9cb..3d85a92a11 100644
--- a/scene/2d/canvas_modulate.h
+++ b/scene/2d/canvas_modulate.h
@@ -46,7 +46,7 @@ public:
 	void set_color(const Color &p_color);
 	Color get_color() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	CanvasModulate();
 	~CanvasModulate();
diff --git a/scene/2d/collision_object_2d.cpp b/scene/2d/collision_object_2d.cpp
index c83ed36917..30728a2755 100644
--- a/scene/2d/collision_object_2d.cpp
+++ b/scene/2d/collision_object_2d.cpp
@@ -363,17 +363,14 @@ void CollisionObject2D::_update_pickable() {
 	}
 }
 
-String CollisionObject2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> CollisionObject2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (shapes.is_empty()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("This node has no shape, so it can't collide or interact with other objects.\nConsider adding a CollisionShape2D or CollisionPolygon2D as a child to define its shape.");
+		warnings.push_back(TTR("This node has no shape, so it can't collide or interact with other objects.\nConsider adding a CollisionShape2D or CollisionPolygon2D as a child to define its shape."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void CollisionObject2D::_bind_methods() {
diff --git a/scene/2d/collision_object_2d.h b/scene/2d/collision_object_2d.h
index e82b61d441..7a1fd23e72 100644
--- a/scene/2d/collision_object_2d.h
+++ b/scene/2d/collision_object_2d.h
@@ -107,7 +107,7 @@ public:
 	void set_pickable(bool p_enabled);
 	bool is_pickable() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	_FORCE_INLINE_ RID get_rid() const { return rid; }
 
diff --git a/scene/2d/collision_polygon_2d.cpp b/scene/2d/collision_polygon_2d.cpp
index 38198c496e..a69ef73a54 100644
--- a/scene/2d/collision_polygon_2d.cpp
+++ b/scene/2d/collision_polygon_2d.cpp
@@ -204,7 +204,7 @@ void CollisionPolygon2D::set_polygon(const Vector<Point2> &p_polygon) {
 		_update_in_shape_owner();
 	}
 	update();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Vector<Point2> CollisionPolygon2D::get_polygon() const {
@@ -219,7 +219,7 @@ void CollisionPolygon2D::set_build_mode(BuildMode p_mode) {
 		_update_in_shape_owner();
 	}
 	update();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 CollisionPolygon2D::BuildMode CollisionPolygon2D::get_build_mode() const {
@@ -240,40 +240,28 @@ bool CollisionPolygon2D::_edit_is_selected_on_click(const Point2 &p_point, doubl
 }
 #endif
 
-String CollisionPolygon2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> CollisionPolygon2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<CollisionObject2D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("CollisionPolygon2D only serves to provide a collision shape to a CollisionObject2D derived node. Please only use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape.");
+		warnings.push_back(TTR("CollisionPolygon2D only serves to provide a collision shape to a CollisionObject2D derived node. Please only use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape."));
 	}
 
 	int polygon_count = polygon.size();
 	if (polygon_count == 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("An empty CollisionPolygon2D has no effect on collision.");
+		warnings.push_back(TTR("An empty CollisionPolygon2D has no effect on collision."));
 	} else {
 		bool solids = build_mode == BUILD_SOLIDS;
 		if (solids) {
 			if (polygon_count < 3) {
-				if (!warning.is_empty()) {
-					warning += "\n\n";
-				}
-				warning += TTR("Invalid polygon. At least 3 points are needed in 'Solids' build mode.");
+				warnings.push_back(TTR("Invalid polygon. At least 3 points are needed in 'Solids' build mode."));
 			}
 		} else if (polygon_count < 2) {
-			if (!warning.is_empty()) {
-				warning += "\n\n";
-			}
-			warning += TTR("Invalid polygon. At least 2 points are needed in 'Segments' build mode.");
+			warnings.push_back(TTR("Invalid polygon. At least 2 points are needed in 'Segments' build mode."));
 		}
 	}
 
-	return warning;
+	return warnings;
 }
 
 void CollisionPolygon2D::set_disabled(bool p_disabled) {
diff --git a/scene/2d/collision_polygon_2d.h b/scene/2d/collision_polygon_2d.h
index 9df9802629..95dd8c9e21 100644
--- a/scene/2d/collision_polygon_2d.h
+++ b/scene/2d/collision_polygon_2d.h
@@ -78,7 +78,7 @@ public:
 	void set_polygon(const Vector<Point2> &p_polygon);
 	Vector<Point2> get_polygon() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void set_disabled(bool p_disabled);
 	bool is_disabled() const;
diff --git a/scene/2d/collision_shape_2d.cpp b/scene/2d/collision_shape_2d.cpp
index 93949f741b..d9009ef85c 100644
--- a/scene/2d/collision_shape_2d.cpp
+++ b/scene/2d/collision_shape_2d.cpp
@@ -162,7 +162,7 @@ void CollisionShape2D::set_shape(const Ref<Shape2D> &p_shape) {
 		shape->connect("changed", callable_mp(this, &CollisionShape2D::_shape_changed));
 	}
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<Shape2D> CollisionShape2D::get_shape() const {
@@ -177,19 +177,23 @@ bool CollisionShape2D::_edit_is_selected_on_click(const Point2 &p_point, double
 	return shape->_edit_is_selected_on_click(p_point, p_tolerance);
 }
 
-String CollisionShape2D::get_configuration_warning() const {
+TypedArray<String> CollisionShape2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
+
 	if (!Object::cast_to<CollisionObject2D>(get_parent())) {
-		return TTR("CollisionShape2D only serves to provide a collision shape to a CollisionObject2D derived node. Please only use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape.");
+		warnings.push_back(TTR("CollisionShape2D only serves to provide a collision shape to a CollisionObject2D derived node. Please only use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape."));
 	}
 	if (!shape.is_valid()) {
-		return TTR("A shape must be provided for CollisionShape2D to function. Please create a shape resource for it!");
+		warnings.push_back(TTR("A shape must be provided for CollisionShape2D to function. Please create a shape resource for it!"));
 	}
+
 	Ref<ConvexPolygonShape2D> convex = shape;
 	Ref<ConcavePolygonShape2D> concave = shape;
 	if (convex.is_valid() || concave.is_valid()) {
-		return TTR("Polygon-based shapes are not meant be used nor edited directly through the CollisionShape2D node. Please use the CollisionPolygon2D node instead.");
+		warnings.push_back(TTR("Polygon-based shapes are not meant be used nor edited directly through the CollisionShape2D node. Please use the CollisionPolygon2D node instead."));
 	}
-	return String();
+
+	return warnings;
 }
 
 void CollisionShape2D::set_disabled(bool p_disabled) {
diff --git a/scene/2d/collision_shape_2d.h b/scene/2d/collision_shape_2d.h
index 695d0c6657..eaf72627c8 100644
--- a/scene/2d/collision_shape_2d.h
+++ b/scene/2d/collision_shape_2d.h
@@ -72,7 +72,7 @@ public:
 	void set_one_way_collision_margin(real_t p_margin);
 	real_t get_one_way_collision_margin() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	CollisionShape2D();
 };
diff --git a/scene/2d/cpu_particles_2d.cpp b/scene/2d/cpu_particles_2d.cpp
index 6a69a4c618..5f2efeb8ca 100644
--- a/scene/2d/cpu_particles_2d.cpp
+++ b/scene/2d/cpu_particles_2d.cpp
@@ -244,18 +244,15 @@ bool CPUParticles2D::get_fractional_delta() const {
 	return fractional_delta;
 }
 
-String CPUParticles2D::get_configuration_warning() const {
-	String warnings = Node2D::get_configuration_warning();
+TypedArray<String> CPUParticles2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	CanvasItemMaterial *mat = Object::cast_to<CanvasItemMaterial>(get_material().ptr());
 
 	if (get_material().is_null() || (mat && !mat->get_particles_animation())) {
 		if (get_param(PARAM_ANIM_SPEED) != 0.0 || get_param(PARAM_ANIM_OFFSET) != 0.0 ||
 				get_param_curve(PARAM_ANIM_SPEED).is_valid() || get_param_curve(PARAM_ANIM_OFFSET).is_valid()) {
-			if (warnings != String()) {
-				warnings += "\n";
-			}
-			warnings += "- " + TTR("CPUParticles2D animation requires the usage of a CanvasItemMaterial with \"Particles Animation\" enabled.");
+			warnings.push_back(TTR("CPUParticles2D animation requires the usage of a CanvasItemMaterial with \"Particles Animation\" enabled."));
 		}
 	}
 
diff --git a/scene/2d/cpu_particles_2d.h b/scene/2d/cpu_particles_2d.h
index ab04ee4a57..ba34a0f45d 100644
--- a/scene/2d/cpu_particles_2d.h
+++ b/scene/2d/cpu_particles_2d.h
@@ -275,7 +275,7 @@ public:
 	void set_gravity(const Vector2 &p_gravity);
 	Vector2 get_gravity() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void restart();
 
diff --git a/scene/2d/gpu_particles_2d.cpp b/scene/2d/gpu_particles_2d.cpp
index af70c47f7c..8a0631a614 100644
--- a/scene/2d/gpu_particles_2d.cpp
+++ b/scene/2d/gpu_particles_2d.cpp
@@ -137,7 +137,7 @@ void GPUParticles2D::set_process_material(const Ref<Material> &p_material) {
 	}
 	RS::get_singleton()->particles_set_process_material(particles, material_rid);
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 void GPUParticles2D::set_speed_scale(float p_scale) {
@@ -216,18 +216,15 @@ bool GPUParticles2D::get_fractional_delta() const {
 	return fractional_delta;
 }
 
-String GPUParticles2D::get_configuration_warning() const {
+TypedArray<String> GPUParticles2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
+
 	if (RenderingServer::get_singleton()->is_low_end()) {
-		return TTR("GPU-based particles are not supported by the GLES2 video driver.\nUse the CPUParticles2D node instead. You can use the \"Convert to CPUParticles2D\" option for this purpose.");
+		warnings.push_back(TTR("GPU-based particles are not supported by the GLES2 video driver.\nUse the CPUParticles2D node instead. You can use the \"Convert to CPUParticles2D\" option for this purpose."));
 	}
 
-	String warnings = Node2D::get_configuration_warning();
-
 	if (process_material.is_null()) {
-		if (warnings != String()) {
-			warnings += "\n";
-		}
-		warnings += "- " + TTR("A material to process the particles is not assigned, so no behavior is imprinted.");
+		warnings.push_back(TTR("A material to process the particles is not assigned, so no behavior is imprinted."));
 	} else {
 		CanvasItemMaterial *mat = Object::cast_to<CanvasItemMaterial>(get_material().ptr());
 
@@ -236,10 +233,7 @@ String GPUParticles2D::get_configuration_warning() const {
 			if (process &&
 					(process->get_param(ParticlesMaterial::PARAM_ANIM_SPEED) != 0.0 || process->get_param(ParticlesMaterial::PARAM_ANIM_OFFSET) != 0.0 ||
 							process->get_param_texture(ParticlesMaterial::PARAM_ANIM_SPEED).is_valid() || process->get_param_texture(ParticlesMaterial::PARAM_ANIM_OFFSET).is_valid())) {
-				if (warnings != String()) {
-					warnings += "\n";
-				}
-				warnings += "- " + TTR("Particles2D animation requires the usage of a CanvasItemMaterial with \"Particles Animation\" enabled.");
+				warnings.push_back(TTR("Particles2D animation requires the usage of a CanvasItemMaterial with \"Particles Animation\" enabled."));
 			}
 		}
 	}
diff --git a/scene/2d/gpu_particles_2d.h b/scene/2d/gpu_particles_2d.h
index 774cef9cc9..20f9f768ed 100644
--- a/scene/2d/gpu_particles_2d.h
+++ b/scene/2d/gpu_particles_2d.h
@@ -110,7 +110,7 @@ public:
 	void set_texture(const Ref<Texture2D> &p_texture);
 	Ref<Texture2D> get_texture() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void restart();
 	Rect2 capture_rect() const;
diff --git a/scene/2d/joints_2d.cpp b/scene/2d/joints_2d.cpp
index 7d9cdd52ac..8a4ccc2f96 100644
--- a/scene/2d/joints_2d.cpp
+++ b/scene/2d/joints_2d.cpp
@@ -66,6 +66,7 @@ void Joint2D::_update_joint(bool p_only_free) {
 	if (p_only_free || !is_inside_tree()) {
 		PhysicsServer2D::get_singleton()->joint_clear(joint);
 		warning = String();
+		update_configuration_warnings();
 		return;
 	}
 
@@ -76,43 +77,26 @@ void Joint2D::_update_joint(bool p_only_free) {
 	PhysicsBody2D *body_b = Object::cast_to<PhysicsBody2D>(node_b);
 
 	if (node_a && !body_a && node_b && !body_b) {
-		PhysicsServer2D::get_singleton()->joint_clear(joint);
 		warning = TTR("Node A and Node B must be PhysicsBody2Ds");
-		update_configuration_warning();
-		return;
-	}
-
-	if (node_a && !body_a) {
-		PhysicsServer2D::get_singleton()->joint_clear(joint);
+	} else if (node_a && !body_a) {
 		warning = TTR("Node A must be a PhysicsBody2D");
-		update_configuration_warning();
-		return;
-	}
-
-	if (node_b && !body_b) {
-		PhysicsServer2D::get_singleton()->joint_clear(joint);
+	} else if (node_b && !body_b) {
 		warning = TTR("Node B must be a PhysicsBody2D");
-		update_configuration_warning();
-		return;
-	}
-
-	if (!body_a || !body_b) {
-		PhysicsServer2D::get_singleton()->joint_clear(joint);
+	} else if (!body_a || !body_b) {
 		warning = TTR("Joint is not connected to two PhysicsBody2Ds");
-		update_configuration_warning();
-		return;
+	} else if (body_a == body_b) {
+		warning = TTR("Node A and Node B must be different PhysicsBody2Ds");
+	} else {
+		warning = String();
 	}
 
-	if (body_a == body_b) {
+	update_configuration_warnings();
+
+	if (!warning.is_empty()) {
 		PhysicsServer2D::get_singleton()->joint_clear(joint);
-		warning = TTR("Node A and Node B must be different PhysicsBody2Ds");
-		update_configuration_warning();
 		return;
 	}
 
-	warning = String();
-	update_configuration_warning();
-
 	if (body_a) {
 		body_a->force_update_transform();
 	}
@@ -211,17 +195,14 @@ bool Joint2D::get_exclude_nodes_from_collision() const {
 	return exclude_from_collision;
 }
 
-String Joint2D::get_configuration_warning() const {
-	String node_warning = Node2D::get_configuration_warning();
+TypedArray<String> Joint2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node2D::get_configuration_warnings();
 
 	if (!warning.is_empty()) {
-		if (!node_warning.is_empty()) {
-			node_warning += "\n\n";
-		}
-		node_warning += warning;
+		warnings.push_back(warning);
 	}
 
-	return node_warning;
+	return warnings;
 }
 
 void Joint2D::_bind_methods() {
diff --git a/scene/2d/joints_2d.h b/scene/2d/joints_2d.h
index 08e02ee29d..dc5a08f815 100644
--- a/scene/2d/joints_2d.h
+++ b/scene/2d/joints_2d.h
@@ -62,7 +62,7 @@ protected:
 	_FORCE_INLINE_ bool is_configured() const { return configured; }
 
 public:
-	virtual String get_configuration_warning() const override;
+	virtual TypedArray<String> get_configuration_warnings() const override;
 
 	void set_node_a(const NodePath &p_node_a);
 	NodePath get_node_a() const;
diff --git a/scene/2d/light_2d.cpp b/scene/2d/light_2d.cpp
index 99e35cad1d..8fb765f16b 100644
--- a/scene/2d/light_2d.cpp
+++ b/scene/2d/light_2d.cpp
@@ -373,7 +373,7 @@ void PointLight2D::set_texture(const Ref<Texture2D> &p_texture) {
 		RS::get_singleton()->canvas_light_set_texture(_get_light(), RID());
 	}
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<Texture2D> PointLight2D::get_texture() const {
@@ -390,17 +390,14 @@ Vector2 PointLight2D::get_texture_offset() const {
 	return texture_offset;
 }
 
-String PointLight2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> PointLight2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!texture.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("A texture with the shape of the light must be supplied to the \"Texture\" property.");
+		warnings.push_back(TTR("A texture with the shape of the light must be supplied to the \"Texture\" property."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void PointLight2D::set_texture_scale(real_t p_scale) {
diff --git a/scene/2d/light_2d.h b/scene/2d/light_2d.h
index ae6cf6d0a0..d9ecd81f1c 100644
--- a/scene/2d/light_2d.h
+++ b/scene/2d/light_2d.h
@@ -169,7 +169,7 @@ public:
 	void set_texture_scale(real_t p_scale);
 	real_t get_texture_scale() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	PointLight2D();
 };
diff --git a/scene/2d/light_occluder_2d.cpp b/scene/2d/light_occluder_2d.cpp
index 9589702e2e..fdc28f81c2 100644
--- a/scene/2d/light_occluder_2d.cpp
+++ b/scene/2d/light_occluder_2d.cpp
@@ -242,24 +242,18 @@ int LightOccluder2D::get_occluder_light_mask() const {
 	return mask;
 }
 
-String LightOccluder2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> LightOccluder2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!occluder_polygon.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("An occluder polygon must be set (or drawn) for this occluder to take effect.");
+		warnings.push_back(TTR("An occluder polygon must be set (or drawn) for this occluder to take effect."));
 	}
 
 	if (occluder_polygon.is_valid() && occluder_polygon->get_polygon().size() == 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The occluder polygon for this occluder is empty. Please draw a polygon.");
+		warnings.push_back(TTR("The occluder polygon for this occluder is empty. Please draw a polygon."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void LightOccluder2D::set_as_sdf_collision(bool p_enable) {
diff --git a/scene/2d/light_occluder_2d.h b/scene/2d/light_occluder_2d.h
index f567c6d965..b4a48d1062 100644
--- a/scene/2d/light_occluder_2d.h
+++ b/scene/2d/light_occluder_2d.h
@@ -106,7 +106,7 @@ public:
 	void set_as_sdf_collision(bool p_enable);
 	bool is_set_as_sdf_collision() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	LightOccluder2D();
 	~LightOccluder2D();
diff --git a/scene/2d/navigation_agent_2d.cpp b/scene/2d/navigation_agent_2d.cpp
index 064fcc91a4..a18687afed 100644
--- a/scene/2d/navigation_agent_2d.cpp
+++ b/scene/2d/navigation_agent_2d.cpp
@@ -239,17 +239,14 @@ void NavigationAgent2D::_avoidance_done(Vector3 p_new_velocity) {
 	emit_signal("velocity_computed", velocity);
 }
 
-String NavigationAgent2D::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> NavigationAgent2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<Node2D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The NavigationAgent2D can be used only under a Node2D node");
+		warnings.push_back(TTR("The NavigationAgent2D can be used only under a Node2D node"));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationAgent2D::update_navigation() {
diff --git a/scene/2d/navigation_agent_2d.h b/scene/2d/navigation_agent_2d.h
index 153ede8cec..138ba3bc64 100644
--- a/scene/2d/navigation_agent_2d.h
+++ b/scene/2d/navigation_agent_2d.h
@@ -136,7 +136,7 @@ public:
 	void set_velocity(Vector2 p_velocity);
 	void _avoidance_done(Vector3 p_new_velocity);
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 private:
 	void update_navigation();
diff --git a/scene/2d/navigation_obstacle_2d.cpp b/scene/2d/navigation_obstacle_2d.cpp
index 965e2b6dc1..a06f7a9fd0 100644
--- a/scene/2d/navigation_obstacle_2d.cpp
+++ b/scene/2d/navigation_obstacle_2d.cpp
@@ -69,17 +69,14 @@ NavigationObstacle2D::~NavigationObstacle2D() {
 	agent = RID(); // Pointless
 }
 
-String NavigationObstacle2D::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> NavigationObstacle2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<Node2D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The NavigationObstacle2D only serves to provide collision avoidance to a Node2D object.");
+		warnings.push_back(TTR("The NavigationObstacle2D only serves to provide collision avoidance to a Node2D object."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationObstacle2D::update_agent_shape() {
diff --git a/scene/2d/navigation_obstacle_2d.h b/scene/2d/navigation_obstacle_2d.h
index 135ca4651e..9cffc2c0c3 100644
--- a/scene/2d/navigation_obstacle_2d.h
+++ b/scene/2d/navigation_obstacle_2d.h
@@ -52,7 +52,7 @@ public:
 		return agent;
 	}
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 private:
 	void update_agent_shape();
diff --git a/scene/2d/navigation_region_2d.cpp b/scene/2d/navigation_region_2d.cpp
index 8be8c8db4a..d2caf5bea8 100644
--- a/scene/2d/navigation_region_2d.cpp
+++ b/scene/2d/navigation_region_2d.cpp
@@ -491,7 +491,7 @@ void NavigationRegion2D::set_navigation_polygon(const Ref<NavigationPolygon> &p_
 	}
 	_navpoly_changed();
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<NavigationPolygon> NavigationRegion2D::get_navigation_polygon() const {
@@ -509,21 +509,16 @@ void NavigationRegion2D::_map_changed(RID p_map) {
 	}
 }
 
-String NavigationRegion2D::get_configuration_warning() const {
-	if (!is_visible_in_tree() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> NavigationRegion2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node2D::get_configuration_warnings();
 
-	if (!navpoly.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible_in_tree() && is_inside_tree()) {
+		if (!navpoly.is_valid()) {
+			warnings.push_back(TTR("A NavigationMesh resource must be set or created for this node to work. Please set a property or draw a polygon."));
 		}
-		warning += TTR("A NavigationPolygon resource must be set or created for this node to work. Please set a property or draw a polygon.");
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationRegion2D::_bind_methods() {
diff --git a/scene/2d/navigation_region_2d.h b/scene/2d/navigation_region_2d.h
index 58f04599be..2db8d70791 100644
--- a/scene/2d/navigation_region_2d.h
+++ b/scene/2d/navigation_region_2d.h
@@ -120,7 +120,7 @@ public:
 	void set_navigation_polygon(const Ref<NavigationPolygon> &p_navpoly);
 	Ref<NavigationPolygon> get_navigation_polygon() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	NavigationRegion2D();
 	~NavigationRegion2D();
diff --git a/scene/2d/parallax_layer.cpp b/scene/2d/parallax_layer.cpp
index 725e858a43..228020d383 100644
--- a/scene/2d/parallax_layer.cpp
+++ b/scene/2d/parallax_layer.cpp
@@ -135,17 +135,14 @@ void ParallaxLayer::set_base_offset_and_scale(const Point2 &p_offset, real_t p_s
 	_update_mirroring();
 }
 
-String ParallaxLayer::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> ParallaxLayer::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<ParallaxBackground>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("ParallaxLayer node only works when set as child of a ParallaxBackground node.");
+		warnings.push_back(TTR("ParallaxLayer node only works when set as child of a ParallaxBackground node."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void ParallaxLayer::_bind_methods() {
diff --git a/scene/2d/parallax_layer.h b/scene/2d/parallax_layer.h
index e826e6da9c..cc2d2e096e 100644
--- a/scene/2d/parallax_layer.h
+++ b/scene/2d/parallax_layer.h
@@ -61,7 +61,7 @@ public:
 
 	void set_base_offset_and_scale(const Point2 &p_offset, real_t p_scale, const Point2 &p_screen_offset);
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 	ParallaxLayer();
 };
 
diff --git a/scene/2d/path_2d.cpp b/scene/2d/path_2d.cpp
index be160ee1dd..9912612c4f 100644
--- a/scene/2d/path_2d.cpp
+++ b/scene/2d/path_2d.cpp
@@ -249,21 +249,16 @@ void PathFollow2D::_validate_property(PropertyInfo &property) const {
 	}
 }
 
-String PathFollow2D::get_configuration_warning() const {
-	if (!is_visible_in_tree() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> PathFollow2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	if (!Object::cast_to<Path2D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible_in_tree() && is_inside_tree()) {
+		if (!Object::cast_to<Path2D>(get_parent())) {
+			warnings.push_back(TTR("PathFollow2D only works when set as a child of a Path2D node."));
 		}
-		warning += TTR("PathFollow2D only works when set as a child of a Path2D node.");
 	}
 
-	return warning;
+	return warnings;
 }
 
 void PathFollow2D::_bind_methods() {
diff --git a/scene/2d/path_2d.h b/scene/2d/path_2d.h
index 671ab493c3..3b12f025fc 100644
--- a/scene/2d/path_2d.h
+++ b/scene/2d/path_2d.h
@@ -105,7 +105,7 @@ public:
 	void set_cubic_interpolation(bool p_enable);
 	bool get_cubic_interpolation() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	PathFollow2D() {}
 };
diff --git a/scene/2d/physics_body_2d.cpp b/scene/2d/physics_body_2d.cpp
index a615d96687..830834c964 100644
--- a/scene/2d/physics_body_2d.cpp
+++ b/scene/2d/physics_body_2d.cpp
@@ -708,26 +708,23 @@ void RigidBody2D::_notification(int p_what) {
 
 	if (p_what == NOTIFICATION_LOCAL_TRANSFORM_CHANGED) {
 		if (Engine::get_singleton()->is_editor_hint()) {
-			update_configuration_warning();
+			update_configuration_warnings();
 		}
 	}
 
 #endif
 }
 
-String RigidBody2D::get_configuration_warning() const {
+TypedArray<String> RigidBody2D::get_configuration_warnings() const {
 	Transform2D t = get_transform();
 
-	String warning = CollisionObject2D::get_configuration_warning();
+	TypedArray<String> warnings = CollisionObject2D::get_configuration_warnings();
 
 	if ((get_mode() == MODE_RIGID || get_mode() == MODE_CHARACTER) && (ABS(t.elements[0].length() - 1.0) > 0.05 || ABS(t.elements[1].length() - 1.0) > 0.05)) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Size changes to RigidBody2D (in character or rigid modes) will be overridden by the physics engine when running.\nChange the size in children collision shapes instead.");
+		warnings.push_back(TTR("Size changes to RigidBody2D (in character or rigid modes) will be overridden by the physics engine when running.\nChange the size in children collision shapes instead."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void RigidBody2D::_bind_methods() {
diff --git a/scene/2d/physics_body_2d.h b/scene/2d/physics_body_2d.h
index 2dc853b23b..aeec662e5c 100644
--- a/scene/2d/physics_body_2d.h
+++ b/scene/2d/physics_body_2d.h
@@ -246,7 +246,7 @@ public:
 
 	TypedArray<Node2D> get_colliding_bodies() const; //function for script
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	RigidBody2D();
 	~RigidBody2D();
diff --git a/scene/2d/remote_transform_2d.cpp b/scene/2d/remote_transform_2d.cpp
index f10714e28a..a7613dc009 100644
--- a/scene/2d/remote_transform_2d.cpp
+++ b/scene/2d/remote_transform_2d.cpp
@@ -138,7 +138,7 @@ void RemoteTransform2D::set_remote_node(const NodePath &p_remote_node) {
 		_update_remote();
 	}
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 NodePath RemoteTransform2D::get_remote_node() const {
@@ -185,17 +185,14 @@ void RemoteTransform2D::force_update_cache() {
 	_update_cache();
 }
 
-String RemoteTransform2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> RemoteTransform2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!has_node(remote_node) || !Object::cast_to<Node2D>(get_node(remote_node))) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Path property must point to a valid Node2D node to work.");
+		warnings.push_back(TTR("Path property must point to a valid Node2D node to work."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void RemoteTransform2D::_bind_methods() {
diff --git a/scene/2d/remote_transform_2d.h b/scene/2d/remote_transform_2d.h
index 4a26d7b339..36fddb58c7 100644
--- a/scene/2d/remote_transform_2d.h
+++ b/scene/2d/remote_transform_2d.h
@@ -70,7 +70,7 @@ public:
 
 	void force_update_cache();
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	RemoteTransform2D();
 };
diff --git a/scene/2d/skeleton_2d.cpp b/scene/2d/skeleton_2d.cpp
index 2d19d254b1..22180797f0 100644
--- a/scene/2d/skeleton_2d.cpp
+++ b/scene/2d/skeleton_2d.cpp
@@ -100,7 +100,7 @@ void Bone2D::set_rest(const Transform2D &p_rest) {
 		skeleton->_make_bone_setup_dirty();
 	}
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Transform2D Bone2D::get_rest() const {
@@ -133,27 +133,21 @@ int Bone2D::get_index_in_skeleton() const {
 	return skeleton_index;
 }
 
-String Bone2D::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> Bone2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 	if (!skeleton) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
 		if (parent_bone) {
-			warning += TTR("This Bone2D chain should end at a Skeleton2D node.");
+			warnings.push_back(TTR("This Bone2D chain should end at a Skeleton2D node."));
 		} else {
-			warning += TTR("A Bone2D only works with a Skeleton2D or another Bone2D as parent node.");
+			warnings.push_back(TTR("A Bone2D only works with a Skeleton2D or another Bone2D as parent node."));
 		}
 	}
 
 	if (rest == Transform2D(0, 0, 0, 0, 0, 0)) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("This bone lacks a proper REST pose. Go to the Skeleton2D node and set one.");
+		warnings.push_back(TTR("This bone lacks a proper REST pose. Go to the Skeleton2D node and set one."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 Bone2D::Bone2D() {
diff --git a/scene/2d/skeleton_2d.h b/scene/2d/skeleton_2d.h
index 1f43ea742b..fd62b87bde 100644
--- a/scene/2d/skeleton_2d.h
+++ b/scene/2d/skeleton_2d.h
@@ -60,7 +60,7 @@ public:
 	void apply_rest();
 	Transform2D get_skeleton_rest() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void set_default_length(real_t p_length);
 	real_t get_default_length() const;
diff --git a/scene/2d/tile_map.cpp b/scene/2d/tile_map.cpp
index 81a5b0b28c..776d3bca5f 100644
--- a/scene/2d/tile_map.cpp
+++ b/scene/2d/tile_map.cpp
@@ -59,7 +59,7 @@ void TileMap::_notification(int p_what) {
 			RID space = get_world_2d()->get_space();
 			_update_quadrant_transform();
 			_update_quadrant_space(space);
-			update_configuration_warning();
+			update_configuration_warnings();
 
 		} break;
 
@@ -1301,7 +1301,7 @@ void TileMap::set_collision_use_parent(bool p_use_parent) {
 
 	_recreate_quadrants();
 	notify_property_list_changed();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 void TileMap::set_collision_friction(float p_friction) {
@@ -1693,17 +1693,14 @@ void TileMap::set_texture_repeat(CanvasItem::TextureRepeat p_texture_repeat) {
 	}
 }
 
-String TileMap::get_configuration_warning() const {
-	String warning = Node2D::get_configuration_warning();
+TypedArray<String> TileMap::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (use_parent && !collision_parent) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		return TTR("TileMap with Use Parent on needs a parent CollisionObject2D to give shapes to. Please use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape.");
+		warnings.push_back(TTR("TileMap with Use Parent on needs a parent CollisionObject2D to give shapes to. Please use it as a child of Area2D, StaticBody2D, RigidBody2D, KinematicBody2D, etc. to give them a shape."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void TileMap::_bind_methods() {
diff --git a/scene/2d/tile_map.h b/scene/2d/tile_map.h
index 26c84a0bb9..9d27053fee 100644
--- a/scene/2d/tile_map.h
+++ b/scene/2d/tile_map.h
@@ -340,7 +340,7 @@ public:
 	void set_clip_uv(bool p_enable);
 	bool get_clip_uv() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	virtual void set_texture_filter(CanvasItem::TextureFilter p_texture_filter) override;
 
diff --git a/scene/2d/visibility_notifier_2d.cpp b/scene/2d/visibility_notifier_2d.cpp
index 916038a1f3..8feb47f1cc 100644
--- a/scene/2d/visibility_notifier_2d.cpp
+++ b/scene/2d/visibility_notifier_2d.cpp
@@ -310,18 +310,15 @@ void VisibilityEnabler2D::_node_removed(Node *p_node) {
 	nodes.erase(p_node);
 }
 
-String VisibilityEnabler2D::get_configuration_warning() const {
-	String warning = VisibilityNotifier2D::get_configuration_warning();
+TypedArray<String> VisibilityEnabler2D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 #ifdef TOOLS_ENABLED
 	if (is_inside_tree() && get_parent() && (get_parent()->get_filename() == String() && get_parent() != get_tree()->get_edited_scene_root())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("VisibilityEnabler2D works best when used with the edited scene root directly as parent.");
+		warnings.push_back(TTR("VisibilityEnabler2D works best when used with the edited scene root directly as parent."));
 	}
 #endif
-	return warning;
+	return warnings;
 }
 
 void VisibilityEnabler2D::_bind_methods() {
diff --git a/scene/2d/visibility_notifier_2d.h b/scene/2d/visibility_notifier_2d.h
index 3d1701a1e5..7f4a5bc193 100644
--- a/scene/2d/visibility_notifier_2d.h
+++ b/scene/2d/visibility_notifier_2d.h
@@ -102,7 +102,7 @@ public:
 	void set_enabler(Enabler p_enabler, bool p_enable);
 	bool is_enabler_enabled(Enabler p_enabler) const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	VisibilityEnabler2D();
 };
diff --git a/scene/3d/collision_object_3d.cpp b/scene/3d/collision_object_3d.cpp
index 39880db29c..261ff5db55 100644
--- a/scene/3d/collision_object_3d.cpp
+++ b/scene/3d/collision_object_3d.cpp
@@ -75,6 +75,11 @@ void CollisionObject3D::_notification(int p_what) {
 			}
 
 		} break;
+		case NOTIFICATION_PREDELETE: {
+			if (debug_shape_count > 0) {
+				_clear_debug_shapes();
+			}
+		} break;
 	}
 }
 
@@ -116,11 +121,13 @@ void CollisionObject3D::_update_debug_shapes() {
 	for (Set<uint32_t>::Element *shapedata_idx = debug_shapes_to_update.front(); shapedata_idx; shapedata_idx = shapedata_idx->next()) {
 		if (shapes.has(shapedata_idx->get())) {
 			ShapeData &shapedata = shapes[shapedata_idx->get()];
+			ShapeData::ShapeBase *shapes = shapedata.shapes.ptrw();
 			for (int i = 0; i < shapedata.shapes.size(); i++) {
-				ShapeData::ShapeBase &s = shapedata.shapes.write[i];
+				ShapeData::ShapeBase &s = shapes[i];
 				if (s.debug_shape) {
 					s.debug_shape->queue_delete();
 					s.debug_shape = nullptr;
+					--debug_shape_count;
 				}
 				if (s.shape.is_null() || shapedata.disabled) {
 					continue;
@@ -133,12 +140,30 @@ void CollisionObject3D::_update_debug_shapes() {
 				add_child(mi);
 				mi->force_update_transform();
 				s.debug_shape = mi;
+				++debug_shape_count;
 			}
 		}
 	}
 	debug_shapes_to_update.clear();
 }
 
+void CollisionObject3D::_clear_debug_shapes() {
+	for (Map<uint32_t, ShapeData>::Element *E = shapes.front(); E; E = E->next()) {
+		ShapeData &shapedata = E->get();
+		ShapeData::ShapeBase *shapes = shapedata.shapes.ptrw();
+		for (int i = 0; i < shapedata.shapes.size(); i++) {
+			ShapeData::ShapeBase &s = shapes[i];
+			if (s.debug_shape) {
+				s.debug_shape->queue_delete();
+				s.debug_shape = nullptr;
+				--debug_shape_count;
+			}
+		}
+	}
+
+	debug_shape_count = 0;
+}
+
 void CollisionObject3D::_update_shape_data(uint32_t p_owner) {
 	if (is_inside_tree() && get_tree()->is_debugging_collisions_hint() && !Engine::get_singleton()->is_editor_hint()) {
 		if (debug_shapes_to_update.is_empty()) {
@@ -395,17 +420,14 @@ bool CollisionObject3D::get_capture_input_on_drag() const {
 	return capture_input_on_drag;
 }
 
-String CollisionObject3D::get_configuration_warning() const {
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> CollisionObject3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (shapes.is_empty()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("This node has no shape, so it can't collide or interact with other objects.\nConsider adding a CollisionShape3D or CollisionPolygon3D as a child to define its shape.");
+		warnings.push_back(TTR("This node has no shape, so it can't collide or interact with other objects.\nConsider adding a CollisionShape3D or CollisionPolygon3D as a child to define its shape."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 CollisionObject3D::CollisionObject3D() {
diff --git a/scene/3d/collision_object_3d.h b/scene/3d/collision_object_3d.h
index fe20176984..e2f6cc7500 100644
--- a/scene/3d/collision_object_3d.h
+++ b/scene/3d/collision_object_3d.h
@@ -62,6 +62,7 @@ class CollisionObject3D : public Node3D {
 	bool ray_pickable = true;
 
 	Set<uint32_t> debug_shapes_to_update;
+	int debug_shape_count = 0;
 
 	void _update_pickable();
 
@@ -78,6 +79,7 @@ protected:
 	virtual void _mouse_exit();
 
 	void _update_debug_shapes();
+	void _clear_debug_shapes();
 
 public:
 	uint32_t create_shape_owner(Object *p_owner);
@@ -110,7 +112,7 @@ public:
 
 	_FORCE_INLINE_ RID get_rid() const { return rid; }
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	CollisionObject3D();
 	~CollisionObject3D();
diff --git a/scene/3d/collision_polygon_3d.cpp b/scene/3d/collision_polygon_3d.cpp
index e3e2eb4669..ac715b22b2 100644
--- a/scene/3d/collision_polygon_3d.cpp
+++ b/scene/3d/collision_polygon_3d.cpp
@@ -121,7 +121,7 @@ void CollisionPolygon3D::set_polygon(const Vector<Point2> &p_polygon) {
 	if (parent) {
 		_build_polygon();
 	}
-	update_configuration_warning();
+	update_configuration_warnings();
 	update_gizmo();
 }
 
@@ -167,24 +167,18 @@ void CollisionPolygon3D::set_margin(real_t p_margin) {
 	}
 }
 
-String CollisionPolygon3D::get_configuration_warning() const {
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> CollisionPolygon3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<CollisionObject3D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("CollisionPolygon3D only serves to provide a collision shape to a CollisionObject3D derived node. Please only use it as a child of Area3D, StaticBody3D, RigidBody3D, KinematicBody3D, etc. to give them a shape.");
+		warnings.push_back(TTR("CollisionPolygon3D only serves to provide a collision shape to a CollisionObject3D derived node. Please only use it as a child of Area3D, StaticBody3D, RigidBody3D, KinematicBody3D, etc. to give them a shape."));
 	}
 
 	if (polygon.is_empty()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("An empty CollisionPolygon3D has no effect on collision.");
+		warnings.push_back(TTR("An empty CollisionPolygon3D has no effect on collision."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 bool CollisionPolygon3D::_is_editable_3d_polygon() const {
diff --git a/scene/3d/collision_polygon_3d.h b/scene/3d/collision_polygon_3d.h
index 750751b509..73b8a8e0e3 100644
--- a/scene/3d/collision_polygon_3d.h
+++ b/scene/3d/collision_polygon_3d.h
@@ -74,7 +74,7 @@ public:
 	real_t get_margin() const;
 	void set_margin(real_t p_margin);
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	CollisionPolygon3D();
 };
diff --git a/scene/3d/collision_shape_3d.cpp b/scene/3d/collision_shape_3d.cpp
index 242d82ab4c..bec87914c0 100644
--- a/scene/3d/collision_shape_3d.cpp
+++ b/scene/3d/collision_shape_3d.cpp
@@ -120,34 +120,25 @@ void CollisionShape3D::resource_changed(RES res) {
 	update_gizmo();
 }
 
-String CollisionShape3D::get_configuration_warning() const {
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> CollisionShape3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<CollisionObject3D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("CollisionShape3D only serves to provide a collision shape to a CollisionObject3D derived node. Please only use it as a child of Area3D, StaticBody3D, RigidBody3D, KinematicBody3D, etc. to give them a shape.");
+		warnings.push_back(TTR("CollisionShape3D only serves to provide a collision shape to a CollisionObject3D derived node. Please only use it as a child of Area3D, StaticBody3D, RigidBody3D, KinematicBody3D, etc. to give them a shape."));
 	}
 
 	if (!shape.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("A shape must be provided for CollisionShape3D to function. Please create a shape resource for it.");
+		warnings.push_back(TTR("A shape must be provided for CollisionShape3D to function. Please create a shape resource for it."));
 	}
 
 	if (shape.is_valid() &&
 			Object::cast_to<RigidBody3D>(get_parent()) &&
 			Object::cast_to<ConcavePolygonShape3D>(*shape) &&
 			Object::cast_to<RigidBody3D>(get_parent())->get_mode() != RigidBody3D::MODE_STATIC) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("ConcavePolygonShape3D doesn't support RigidBody3D in another mode than static.");
+		warnings.push_back(TTR("ConcavePolygonShape3D doesn't support RigidBody3D in another mode than static."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void CollisionShape3D::_bind_methods() {
@@ -188,7 +179,7 @@ void CollisionShape3D::set_shape(const Ref<Shape3D> &p_shape) {
 	if (is_inside_tree()) {
 		_shape_changed();
 	}
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<Shape3D> CollisionShape3D::get_shape() const {
diff --git a/scene/3d/collision_shape_3d.h b/scene/3d/collision_shape_3d.h
index 5512417f75..56a4ae3039 100644
--- a/scene/3d/collision_shape_3d.h
+++ b/scene/3d/collision_shape_3d.h
@@ -64,7 +64,7 @@ public:
 	void set_disabled(bool p_disabled);
 	bool is_disabled() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	CollisionShape3D();
 	~CollisionShape3D();
diff --git a/scene/3d/cpu_particles_3d.cpp b/scene/3d/cpu_particles_3d.cpp
index d22d7ff3ab..780773bb57 100644
--- a/scene/3d/cpu_particles_3d.cpp
+++ b/scene/3d/cpu_particles_3d.cpp
@@ -189,8 +189,8 @@ bool CPUParticles3D::get_fractional_delta() const {
 	return fractional_delta;
 }
 
-String CPUParticles3D::get_configuration_warning() const {
-	String warnings = GeometryInstance3D::get_configuration_warning();
+TypedArray<String> CPUParticles3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	bool mesh_found = false;
 	bool anim_material_found = false;
@@ -209,18 +209,12 @@ String CPUParticles3D::get_configuration_warning() const {
 	anim_material_found = anim_material_found || (spat && spat->get_billboard_mode() == StandardMaterial3D::BILLBOARD_PARTICLES);
 
 	if (!mesh_found) {
-		if (warnings != String()) {
-			warnings += "\n";
-		}
-		warnings += "- " + TTR("Nothing is visible because no mesh has been assigned.");
+		warnings.push_back(TTR("Nothing is visible because no mesh has been assigned."));
 	}
 
 	if (!anim_material_found && (get_param(PARAM_ANIM_SPEED) != 0.0 || get_param(PARAM_ANIM_OFFSET) != 0.0 ||
 										get_param_curve(PARAM_ANIM_SPEED).is_valid() || get_param_curve(PARAM_ANIM_OFFSET).is_valid())) {
-		if (warnings != String()) {
-			warnings += "\n";
-		}
-		warnings += "- " + TTR("CPUParticles3D animation requires the usage of a StandardMaterial3D whose Billboard Mode is set to \"Particle Billboard\".");
+		warnings.push_back(TTR("CPUParticles3D animation requires the usage of a StandardMaterial3D whose Billboard Mode is set to \"Particle Billboard\"."));
 	}
 
 	return warnings;
diff --git a/scene/3d/cpu_particles_3d.h b/scene/3d/cpu_particles_3d.h
index 10ac32622d..c073c93c47 100644
--- a/scene/3d/cpu_particles_3d.h
+++ b/scene/3d/cpu_particles_3d.h
@@ -280,7 +280,7 @@ public:
 	void set_gravity(const Vector3 &p_gravity);
 	Vector3 get_gravity() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void restart();
 
diff --git a/scene/3d/gi_probe.cpp b/scene/3d/gi_probe.cpp
index 43f820e5d4..0da53d0101 100644
--- a/scene/3d/gi_probe.cpp
+++ b/scene/3d/gi_probe.cpp
@@ -503,19 +503,15 @@ Vector<Face3> GIProbe::get_faces(uint32_t p_usage_flags) const {
 	return Vector<Face3>();
 }
 
-String GIProbe::get_configuration_warning() const {
-	String warning = VisualInstance3D::get_configuration_warning();
+TypedArray<String> GIProbe::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (RenderingServer::get_singleton()->is_low_end()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("GIProbes are not supported by the GLES2 video driver.\nUse a BakedLightmap instead.");
+		warnings.push_back(TTR("GIProbes are not supported by the GLES2 video driver.\nUse a BakedLightmap instead."));
 	} else if (probe_data.is_null()) {
-		warning += TTR("No GIProbe data set, so this node is disabled. Bake static objects to enable GI.");
+		warnings.push_back(TTR("No GIProbe data set, so this node is disabled. Bake static objects to enable GI."));
 	}
-
-	return warning;
+	return warnings;
 }
 
 void GIProbe::_bind_methods() {
diff --git a/scene/3d/gi_probe.h b/scene/3d/gi_probe.h
index 534b425557..dac7dd3e17 100644
--- a/scene/3d/gi_probe.h
+++ b/scene/3d/gi_probe.h
@@ -165,7 +165,7 @@ public:
 	virtual AABB get_aabb() const override;
 	virtual Vector<Face3> get_faces(uint32_t p_usage_flags) const override;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	GIProbe();
 	~GIProbe();
diff --git a/scene/3d/gpu_particles_3d.cpp b/scene/3d/gpu_particles_3d.cpp
index e2cfc2ed87..a075dcf990 100644
--- a/scene/3d/gpu_particles_3d.cpp
+++ b/scene/3d/gpu_particles_3d.cpp
@@ -115,7 +115,7 @@ void GPUParticles3D::set_process_material(const Ref<Material> &p_material) {
 	}
 	RS::get_singleton()->particles_set_process_material(particles, material_rid);
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 void GPUParticles3D::set_speed_scale(float p_scale) {
@@ -208,7 +208,7 @@ void GPUParticles3D::set_draw_pass_mesh(int p_pass, const Ref<Mesh> &p_mesh) {
 
 	RS::get_singleton()->particles_set_draw_pass_mesh(particles, p_pass, mesh_rid);
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<Mesh> GPUParticles3D::get_draw_pass_mesh(int p_pass) const {
@@ -235,13 +235,13 @@ bool GPUParticles3D::get_fractional_delta() const {
 	return fractional_delta;
 }
 
-String GPUParticles3D::get_configuration_warning() const {
+TypedArray<String> GPUParticles3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
+
 	if (RenderingServer::get_singleton()->is_low_end()) {
-		return TTR("GPU-based particles are not supported by the GLES2 video driver.\nUse the CPUParticles3D node instead. You can use the \"Convert to CPUParticles3D\" option for this purpose.");
+		warnings.push_back(TTR("GPU-based particles are not supported by the GLES2 video driver.\nUse the CPUParticles3D node instead. You can use the \"Convert to CPUParticles3D\" option for this purpose."));
 	}
 
-	String warnings = GeometryInstance3D::get_configuration_warning();
-
 	bool meshes_found = false;
 	bool anim_material_found = false;
 
@@ -264,26 +264,17 @@ String GPUParticles3D::get_configuration_warning() const {
 	anim_material_found = anim_material_found || (spat && spat->get_billboard_mode() == StandardMaterial3D::BILLBOARD_PARTICLES);
 
 	if (!meshes_found) {
-		if (warnings != String()) {
-			warnings += "\n";
-		}
-		warnings += "- " + TTR("Nothing is visible because meshes have not been assigned to draw passes.");
+		warnings.push_back(TTR("Nothing is visible because meshes have not been assigned to draw passes."));
 	}
 
 	if (process_material.is_null()) {
-		if (warnings != String()) {
-			warnings += "\n";
-		}
-		warnings += "- " + TTR("A material to process the particles is not assigned, so no behavior is imprinted.");
+		warnings.push_back(TTR("A material to process the particles is not assigned, so no behavior is imprinted."));
 	} else {
 		const ParticlesMaterial *process = Object::cast_to<ParticlesMaterial>(process_material.ptr());
 		if (!anim_material_found && process &&
 				(process->get_param(ParticlesMaterial::PARAM_ANIM_SPEED) != 0.0 || process->get_param(ParticlesMaterial::PARAM_ANIM_OFFSET) != 0.0 ||
 						process->get_param_texture(ParticlesMaterial::PARAM_ANIM_SPEED).is_valid() || process->get_param_texture(ParticlesMaterial::PARAM_ANIM_OFFSET).is_valid())) {
-			if (warnings != String()) {
-				warnings += "\n";
-			}
-			warnings += "- " + TTR("Particles animation requires the usage of a StandardMaterial3D whose Billboard Mode is set to \"Particle Billboard\".");
+			warnings.push_back(TTR("Particles animation requires the usage of a StandardMaterial3D whose Billboard Mode is set to \"Particle Billboard\"."));
 		}
 	}
 
diff --git a/scene/3d/gpu_particles_3d.h b/scene/3d/gpu_particles_3d.h
index 0c1a1a510c..b9e2b5ccef 100644
--- a/scene/3d/gpu_particles_3d.h
+++ b/scene/3d/gpu_particles_3d.h
@@ -125,7 +125,7 @@ public:
 	void set_draw_pass_mesh(int p_pass, const Ref<Mesh> &p_mesh);
 	Ref<Mesh> get_draw_pass_mesh(int p_pass) const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void set_sub_emitter(const NodePath &p_path);
 	NodePath get_sub_emitter() const;
diff --git a/scene/3d/light_3d.cpp b/scene/3d/light_3d.cpp
index f109640aef..d45749d36b 100644
--- a/scene/3d/light_3d.cpp
+++ b/scene/3d/light_3d.cpp
@@ -48,7 +48,7 @@ void Light3D::set_param(Param p_param, float p_value) {
 		update_gizmo();
 
 		if (p_param == PARAM_SPOT_ANGLE) {
-			update_configuration_warning();
+			update_configuration_warnings();
 		}
 	}
 }
@@ -63,7 +63,7 @@ void Light3D::set_shadow(bool p_enable) {
 	RS::get_singleton()->light_set_shadow(light, p_enable);
 
 	if (type == RenderingServer::LIGHT_SPOT || type == RenderingServer::LIGHT_OMNI) {
-		update_configuration_warning();
+		update_configuration_warnings();
 	}
 
 	notify_property_list_changed();
@@ -153,7 +153,7 @@ void Light3D::set_projector(const Ref<Texture2D> &p_texture) {
 	projector = p_texture;
 	RID tex_id = projector.is_valid() ? projector->get_rid() : RID();
 	RS::get_singleton()->light_set_projector(light, tex_id);
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<Texture2D> Light3D::get_projector() const {
@@ -457,17 +457,14 @@ OmniLight3D::ShadowMode OmniLight3D::get_shadow_mode() const {
 	return shadow_mode;
 }
 
-String OmniLight3D::get_configuration_warning() const {
-	String warning = Light3D::get_configuration_warning();
+TypedArray<String> OmniLight3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!has_shadow() && get_projector().is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Projector texture only works with shadows active.");
+		warnings.push_back(TTR("Projector texture only works with shadows active."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void OmniLight3D::_bind_methods() {
@@ -491,24 +488,18 @@ OmniLight3D::OmniLight3D() :
 	set_param(PARAM_SHADOW_NORMAL_BIAS, 2.0);
 }
 
-String SpotLight3D::get_configuration_warning() const {
-	String warning = Light3D::get_configuration_warning();
+TypedArray<String> SpotLight3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (has_shadow() && get_param(PARAM_SPOT_ANGLE) >= 90.0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("A SpotLight3D with an angle wider than 90 degrees cannot cast shadows.");
+		warnings.push_back(TTR("A SpotLight3D with an angle wider than 90 degrees cannot cast shadows."));
 	}
 
 	if (!has_shadow() && get_projector().is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Projector texture only works with shadows active.");
+		warnings.push_back(TTR("Projector texture only works with shadows active."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void SpotLight3D::_bind_methods() {
diff --git a/scene/3d/light_3d.h b/scene/3d/light_3d.h
index 311db54bce..e145b08b74 100644
--- a/scene/3d/light_3d.h
+++ b/scene/3d/light_3d.h
@@ -202,7 +202,7 @@ public:
 	void set_shadow_mode(ShadowMode p_mode);
 	ShadowMode get_shadow_mode() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	OmniLight3D();
 };
@@ -216,7 +216,7 @@ protected:
 	static void _bind_methods();
 
 public:
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	SpotLight3D() :
 			Light3D(RenderingServer::LIGHT_SPOT) {}
diff --git a/scene/3d/navigation_agent_3d.cpp b/scene/3d/navigation_agent_3d.cpp
index 21ca3d70dd..7346f243ba 100644
--- a/scene/3d/navigation_agent_3d.cpp
+++ b/scene/3d/navigation_agent_3d.cpp
@@ -245,17 +245,14 @@ void NavigationAgent3D::_avoidance_done(Vector3 p_new_velocity) {
 	emit_signal("velocity_computed", p_new_velocity);
 }
 
-String NavigationAgent3D::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> NavigationAgent3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<Node3D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The NavigationAgent3D can be used only under a spatial node.");
+		warnings.push_back(TTR("The NavigationAgent3D can be used only under a spatial node."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationAgent3D::update_navigation() {
diff --git a/scene/3d/navigation_agent_3d.h b/scene/3d/navigation_agent_3d.h
index 22db889618..814c0714e8 100644
--- a/scene/3d/navigation_agent_3d.h
+++ b/scene/3d/navigation_agent_3d.h
@@ -143,7 +143,7 @@ public:
 	void set_velocity(Vector3 p_velocity);
 	void _avoidance_done(Vector3 p_new_velocity);
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 private:
 	void update_navigation();
diff --git a/scene/3d/navigation_obstacle_3d.cpp b/scene/3d/navigation_obstacle_3d.cpp
index df03bca4fd..20ffc3b00e 100644
--- a/scene/3d/navigation_obstacle_3d.cpp
+++ b/scene/3d/navigation_obstacle_3d.cpp
@@ -76,17 +76,14 @@ NavigationObstacle3D::~NavigationObstacle3D() {
 	agent = RID(); // Pointless
 }
 
-String NavigationObstacle3D::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> NavigationObstacle3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	if (!parent_node3d) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The NavigationObstacle3D only serves to provide collision avoidance to a spatial object.");
+	if (!Object::cast_to<Node3D>(get_parent())) {
+		warnings.push_back(TTR("The NavigationObstacle3D only serves to provide collision avoidance to a spatial object."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationObstacle3D::update_agent_shape() {
diff --git a/scene/3d/navigation_obstacle_3d.h b/scene/3d/navigation_obstacle_3d.h
index b1bb53724a..2f78f624a4 100644
--- a/scene/3d/navigation_obstacle_3d.h
+++ b/scene/3d/navigation_obstacle_3d.h
@@ -52,7 +52,7 @@ public:
 		return agent;
 	}
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 private:
 	void update_agent_shape();
diff --git a/scene/3d/navigation_region_3d.cpp b/scene/3d/navigation_region_3d.cpp
index 3ca704e4b8..0afad62404 100644
--- a/scene/3d/navigation_region_3d.cpp
+++ b/scene/3d/navigation_region_3d.cpp
@@ -135,7 +135,7 @@ void NavigationRegion3D::set_navigation_mesh(const Ref<NavigationMesh> &p_navmes
 	emit_signal("navigation_mesh_changed");
 
 	update_gizmo();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<NavigationMesh> NavigationRegion3D::get_navigation_mesh() const {
@@ -177,21 +177,16 @@ void NavigationRegion3D::_bake_finished(Ref<NavigationMesh> p_nav_mesh) {
 	emit_signal("bake_finished");
 }
 
-String NavigationRegion3D::get_configuration_warning() const {
-	if (!is_visible_in_tree() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> NavigationRegion3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	if (!navmesh.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible_in_tree() && is_inside_tree()) {
+		if (!navmesh.is_valid()) {
+			warnings.push_back(TTR("A NavigationMesh resource must be set or created for this node to work."));
 		}
-		warning += TTR("A NavigationMesh resource must be set or created for this node to work.");
 	}
 
-	return warning;
+	return warnings;
 }
 
 void NavigationRegion3D::_bind_methods() {
@@ -217,7 +212,7 @@ void NavigationRegion3D::_bind_methods() {
 
 void NavigationRegion3D::_navigation_changed() {
 	update_gizmo();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 NavigationRegion3D::NavigationRegion3D() {
diff --git a/scene/3d/navigation_region_3d.h b/scene/3d/navigation_region_3d.h
index 52fa2d6159..c2045215b1 100644
--- a/scene/3d/navigation_region_3d.h
+++ b/scene/3d/navigation_region_3d.h
@@ -66,7 +66,7 @@ public:
 	void bake_navigation_mesh();
 	void _bake_finished(Ref<NavigationMesh> p_nav_mesh);
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	NavigationRegion3D();
 	~NavigationRegion3D();
diff --git a/scene/3d/path_3d.cpp b/scene/3d/path_3d.cpp
index 7e2601902b..4ec4ee6207 100644
--- a/scene/3d/path_3d.cpp
+++ b/scene/3d/path_3d.cpp
@@ -50,7 +50,7 @@ void Path3D::_curve_changed() {
 		for (int i = 0; i < get_child_count(); i++) {
 			PathFollow3D *child = Object::cast_to<PathFollow3D>(get_child(i));
 			if (child) {
-				child->update_configuration_warning();
+				child->update_configuration_warnings();
 			}
 		}
 	}
@@ -241,29 +241,21 @@ void PathFollow3D::_validate_property(PropertyInfo &property) const {
 	}
 }
 
-String PathFollow3D::get_configuration_warning() const {
-	if (!is_visible_in_tree() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> PathFollow3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	if (!Object::cast_to<Path3D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("PathFollow3D only works when set as a child of a Path3D node.");
-	} else {
-		Path3D *path = Object::cast_to<Path3D>(get_parent());
-		if (path->get_curve().is_valid() && !path->get_curve()->is_up_vector_enabled() && rotation_mode == ROTATION_ORIENTED) {
-			if (!warning.is_empty()) {
-				warning += "\n\n";
+	if (is_visible_in_tree() && is_inside_tree()) {
+		if (!Object::cast_to<Path3D>(get_parent())) {
+			warnings.push_back(TTR("PathFollow3D only works when set as a child of a Path3D node."));
+		} else {
+			Path3D *path = Object::cast_to<Path3D>(get_parent());
+			if (path->get_curve().is_valid() && !path->get_curve()->is_up_vector_enabled() && rotation_mode == ROTATION_ORIENTED) {
+				warnings.push_back(TTR("PathFollow3D's ROTATION_ORIENTED requires \"Up Vector\" to be enabled in its parent Path3D's Curve resource."));
 			}
-			warning += TTR("PathFollow3D's ROTATION_ORIENTED requires \"Up Vector\" to be enabled in its parent Path3D's Curve resource.");
 		}
 	}
 
-	return warning;
+	return warnings;
 }
 
 void PathFollow3D::_bind_methods() {
@@ -368,7 +360,7 @@ float PathFollow3D::get_unit_offset() const {
 void PathFollow3D::set_rotation_mode(RotationMode p_rotation_mode) {
 	rotation_mode = p_rotation_mode;
 
-	update_configuration_warning();
+	update_configuration_warnings();
 	_update_transform();
 }
 
diff --git a/scene/3d/path_3d.h b/scene/3d/path_3d.h
index 17ee47593e..8545370a4a 100644
--- a/scene/3d/path_3d.h
+++ b/scene/3d/path_3d.h
@@ -104,7 +104,7 @@ public:
 	void set_cubic_interpolation(bool p_enable);
 	bool get_cubic_interpolation() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	PathFollow3D() {}
 };
diff --git a/scene/3d/physics_body_3d.cpp b/scene/3d/physics_body_3d.cpp
index e225c1f22d..2afbebdacc 100644
--- a/scene/3d/physics_body_3d.cpp
+++ b/scene/3d/physics_body_3d.cpp
@@ -444,7 +444,7 @@ void RigidBody3D::_notification(int p_what) {
 
 	if (p_what == NOTIFICATION_LOCAL_TRANSFORM_CHANGED) {
 		if (Engine::get_singleton()->is_editor_hint()) {
-			update_configuration_warning();
+			update_configuration_warnings();
 		}
 	}
 
@@ -469,7 +469,7 @@ void RigidBody3D::set_mode(Mode p_mode) {
 			PhysicsServer3D::get_singleton()->body_set_mode(get_rid(), PhysicsServer3D::BODY_MODE_KINEMATIC);
 		} break;
 	}
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 RigidBody3D::Mode RigidBody3D::get_mode() const {
@@ -709,19 +709,16 @@ Array RigidBody3D::get_colliding_bodies() const {
 	return ret;
 }
 
-String RigidBody3D::get_configuration_warning() const {
+TypedArray<String> RigidBody3D::get_configuration_warnings() const {
 	Transform t = get_transform();
 
-	String warning = CollisionObject3D::get_configuration_warning();
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if ((get_mode() == MODE_RIGID || get_mode() == MODE_CHARACTER) && (ABS(t.basis.get_axis(0).length() - 1.0) > 0.05 || ABS(t.basis.get_axis(1).length() - 1.0) > 0.05 || ABS(t.basis.get_axis(2).length() - 1.0) > 0.05)) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Size changes to RigidBody3D (in character or rigid modes) will be overridden by the physics engine when running.\nChange the size in children collision shapes instead.");
+		warnings.push_back(TTR("Size changes to RigidBody3D (in character or rigid modes) will be overridden by the physics engine when running.\nChange the size in children collision shapes instead."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void RigidBody3D::_bind_methods() {
diff --git a/scene/3d/physics_body_3d.h b/scene/3d/physics_body_3d.h
index 1450fce6a6..9515b044ab 100644
--- a/scene/3d/physics_body_3d.h
+++ b/scene/3d/physics_body_3d.h
@@ -238,7 +238,7 @@ public:
 	void apply_impulse(const Vector3 &p_impulse, const Vector3 &p_position = Vector3());
 	void apply_torque_impulse(const Vector3 &p_impulse);
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	RigidBody3D();
 	~RigidBody3D();
diff --git a/scene/3d/physics_joint_3d.cpp b/scene/3d/physics_joint_3d.cpp
index de9c75621b..3d58d1c10e 100644
--- a/scene/3d/physics_joint_3d.cpp
+++ b/scene/3d/physics_joint_3d.cpp
@@ -65,6 +65,7 @@ void Joint3D::_update_joint(bool p_only_free) {
 	if (p_only_free || !is_inside_tree()) {
 		PhysicsServer3D::get_singleton()->joint_clear(joint);
 		warning = String();
+		update_configuration_warnings();
 		return;
 	}
 
@@ -75,43 +76,26 @@ void Joint3D::_update_joint(bool p_only_free) {
 	PhysicsBody3D *body_b = Object::cast_to<PhysicsBody3D>(node_b);
 
 	if (node_a && !body_a && node_b && !body_b) {
-		PhysicsServer3D::get_singleton()->joint_clear(joint);
 		warning = TTR("Node A and Node B must be PhysicsBody3Ds");
-		update_configuration_warning();
-		return;
-	}
-
-	if (node_a && !body_a) {
-		PhysicsServer3D::get_singleton()->joint_clear(joint);
+	} else if (node_a && !body_a) {
 		warning = TTR("Node A must be a PhysicsBody3D");
-		update_configuration_warning();
-		return;
-	}
-
-	if (node_b && !body_b) {
-		PhysicsServer3D::get_singleton()->joint_clear(joint);
+	} else if (node_b && !body_b) {
 		warning = TTR("Node B must be a PhysicsBody3D");
-		update_configuration_warning();
-		return;
-	}
-
-	if (!body_a && !body_b) {
-		PhysicsServer3D::get_singleton()->joint_clear(joint);
+	} else if (!body_a && !body_b) {
 		warning = TTR("Joint is not connected to any PhysicsBody3Ds");
-		update_configuration_warning();
-		return;
+	} else if (body_a == body_b) {
+		warning = TTR("Node A and Node B must be different PhysicsBody3Ds");
+	} else {
+		warning = String();
 	}
 
-	if (body_a == body_b) {
+	update_configuration_warnings();
+
+	if (!warning.is_empty()) {
 		PhysicsServer3D::get_singleton()->joint_clear(joint);
-		warning = TTR("Node A and Node B must be different PhysicsBody3Ds");
-		update_configuration_warning();
 		return;
 	}
 
-	warning = String();
-	update_configuration_warning();
-
 	configured = true;
 
 	if (body_a) {
@@ -206,17 +190,14 @@ bool Joint3D::get_exclude_nodes_from_collision() const {
 	return exclude_from_collision;
 }
 
-String Joint3D::get_configuration_warning() const {
-	String node_warning = Node3D::get_configuration_warning();
+TypedArray<String> Joint3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node3D::get_configuration_warnings();
 
 	if (!warning.is_empty()) {
-		if (!node_warning.is_empty()) {
-			node_warning += "\n\n";
-		}
-		node_warning += warning;
+		warnings.push_back(warning);
 	}
 
-	return node_warning;
+	return warnings;
 }
 
 void Joint3D::_bind_methods() {
diff --git a/scene/3d/physics_joint_3d.h b/scene/3d/physics_joint_3d.h
index f624ba602b..3e0ea38a5c 100644
--- a/scene/3d/physics_joint_3d.h
+++ b/scene/3d/physics_joint_3d.h
@@ -63,7 +63,7 @@ protected:
 	_FORCE_INLINE_ bool is_configured() const { return configured; }
 
 public:
-	virtual String get_configuration_warning() const override;
+	virtual TypedArray<String> get_configuration_warnings() const override;
 
 	void set_node_a(const NodePath &p_node_a);
 	NodePath get_node_a() const;
diff --git a/scene/3d/remote_transform_3d.cpp b/scene/3d/remote_transform_3d.cpp
index 83ac813c53..29a407905b 100644
--- a/scene/3d/remote_transform_3d.cpp
+++ b/scene/3d/remote_transform_3d.cpp
@@ -133,7 +133,7 @@ void RemoteTransform3D::set_remote_node(const NodePath &p_remote_node) {
 		_update_remote();
 	}
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 NodePath RemoteTransform3D::get_remote_node() const {
@@ -179,17 +179,14 @@ void RemoteTransform3D::force_update_cache() {
 	_update_cache();
 }
 
-String RemoteTransform3D::get_configuration_warning() const {
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> RemoteTransform3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!has_node(remote_node) || !Object::cast_to<Node3D>(get_node(remote_node))) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The \"Remote Path\" property must point to a valid Node3D or Node3D-derived node to work.");
+		warnings.push_back(TTR("The \"Remote Path\" property must point to a valid Node3D or Node3D-derived node to work."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void RemoteTransform3D::_bind_methods() {
diff --git a/scene/3d/remote_transform_3d.h b/scene/3d/remote_transform_3d.h
index 21005d92d1..321bd3b51e 100644
--- a/scene/3d/remote_transform_3d.h
+++ b/scene/3d/remote_transform_3d.h
@@ -70,7 +70,7 @@ public:
 
 	void force_update_cache();
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	RemoteTransform3D();
 };
diff --git a/scene/3d/soft_body_3d.cpp b/scene/3d/soft_body_3d.cpp
index 3fde4d6ef3..08e25b589e 100644
--- a/scene/3d/soft_body_3d.cpp
+++ b/scene/3d/soft_body_3d.cpp
@@ -249,7 +249,7 @@ void SoftBody3D::_softbody_changed() {
 	prepare_physics_server();
 	_reset_points_offsets();
 #ifdef TOOLS_ENABLED
-	update_configuration_warning();
+	update_configuration_warnings();
 #endif
 }
 
@@ -301,7 +301,7 @@ void SoftBody3D::_notification(int p_what) {
 
 	if (p_what == NOTIFICATION_LOCAL_TRANSFORM_CHANGED) {
 		if (Engine::get_singleton()->is_editor_hint()) {
-			update_configuration_warning();
+			update_configuration_warnings();
 		}
 	}
 
@@ -366,27 +366,19 @@ void SoftBody3D::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "ray_pickable"), "set_ray_pickable", "is_ray_pickable");
 }
 
-String SoftBody3D::get_configuration_warning() const {
-	String warning = MeshInstance3D::get_configuration_warning();
+TypedArray<String> SoftBody3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (get_mesh().is_null()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-
-		warning += TTR("This body will be ignored until you set a mesh.");
+		warnings.push_back(TTR("This body will be ignored until you set a mesh."));
 	}
 
 	Transform t = get_transform();
 	if ((ABS(t.basis.get_axis(0).length() - 1.0) > 0.05 || ABS(t.basis.get_axis(1).length() - 1.0) > 0.05 || ABS(t.basis.get_axis(2).length() - 1.0) > 0.05)) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-
-		warning += TTR("Size changes to SoftBody3D will be overridden by the physics engine when running.\nChange the size in children collision shapes instead.");
+		warnings.push_back(TTR("Size changes to SoftBody3D will be overridden by the physics engine when running.\nChange the size in children collision shapes instead."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void SoftBody3D::_update_physics_server() {
diff --git a/scene/3d/soft_body_3d.h b/scene/3d/soft_body_3d.h
index f98df39209..0d0d39d48f 100644
--- a/scene/3d/soft_body_3d.h
+++ b/scene/3d/soft_body_3d.h
@@ -113,7 +113,7 @@ protected:
 	void _notification(int p_what);
 	static void _bind_methods();
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 protected:
 	void _update_physics_server();
diff --git a/scene/3d/sprite_3d.cpp b/scene/3d/sprite_3d.cpp
index 0be54e7243..33b8b488c6 100644
--- a/scene/3d/sprite_3d.cpp
+++ b/scene/3d/sprite_3d.cpp
@@ -928,7 +928,7 @@ void AnimatedSprite3D::set_sprite_frames(const Ref<SpriteFrames> &p_frames) {
 	notify_property_list_changed();
 	_reset_timeout();
 	_queue_update();
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<SpriteFrames> AnimatedSprite3D::get_sprite_frames() const {
@@ -1058,17 +1058,14 @@ StringName AnimatedSprite3D::get_animation() const {
 	return animation;
 }
 
-String AnimatedSprite3D::get_configuration_warning() const {
-	String warning = SpriteBase3D::get_configuration_warning();
+TypedArray<String> AnimatedSprite3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (frames.is_null()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("A SpriteFrames resource must be created or set in the \"Frames\" property in order for AnimatedSprite3D to display frames.");
+		warnings.push_back(TTR("A SpriteFrames resource must be created or set in the \"Frames\" property in order for AnimatedSprite3D to display frames."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void AnimatedSprite3D::_bind_methods() {
diff --git a/scene/3d/sprite_3d.h b/scene/3d/sprite_3d.h
index d1bc8dc737..5e47e66bcb 100644
--- a/scene/3d/sprite_3d.h
+++ b/scene/3d/sprite_3d.h
@@ -236,7 +236,7 @@ public:
 
 	virtual Rect2 get_item_rect() const override;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 	AnimatedSprite3D();
 };
 
diff --git a/scene/3d/vehicle_body_3d.cpp b/scene/3d/vehicle_body_3d.cpp
index 5b0b3b89d3..edd58347f0 100644
--- a/scene/3d/vehicle_body_3d.cpp
+++ b/scene/3d/vehicle_body_3d.cpp
@@ -102,17 +102,14 @@ void VehicleWheel3D::_notification(int p_what) {
 	}
 }
 
-String VehicleWheel3D::get_configuration_warning() const {
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> VehicleWheel3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!Object::cast_to<VehicleBody3D>(get_parent())) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("VehicleWheel3D serves to provide a wheel system to a VehicleBody3D. Please use it as a child of a VehicleBody3D.");
+		warnings.push_back(TTR("VehicleWheel3D serves to provide a wheel system to a VehicleBody3D. Please use it as a child of a VehicleBody3D."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void VehicleWheel3D::_update(PhysicsDirectBodyState3D *s) {
diff --git a/scene/3d/vehicle_body_3d.h b/scene/3d/vehicle_body_3d.h
index 860fa7e3b7..646071a363 100644
--- a/scene/3d/vehicle_body_3d.h
+++ b/scene/3d/vehicle_body_3d.h
@@ -145,7 +145,7 @@ public:
 	void set_steering(real_t p_steering);
 	real_t get_steering() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	VehicleWheel3D();
 };
diff --git a/scene/3d/world_environment.cpp b/scene/3d/world_environment.cpp
index 214ffd6bd5..829ecc5ec2 100644
--- a/scene/3d/world_environment.cpp
+++ b/scene/3d/world_environment.cpp
@@ -65,7 +65,7 @@ void WorldEnvironment::_update_current_environment() {
 	} else {
 		get_viewport()->find_world_3d()->set_environment(Ref<Environment>());
 	}
-	get_tree()->call_group("_world_environment_" + itos(get_viewport()->find_world_3d()->get_scenario().get_id()), "update_configuration_warning");
+	get_tree()->call_group("_world_environment_" + itos(get_viewport()->find_world_3d()->get_scenario().get_id()), "update_configuration_warnings");
 }
 
 void WorldEnvironment::_update_current_camera_effects() {
@@ -76,7 +76,7 @@ void WorldEnvironment::_update_current_camera_effects() {
 		get_viewport()->find_world_3d()->set_camera_effects(Ref<CameraEffects>());
 	}
 
-	get_tree()->call_group("_world_camera_effects_" + itos(get_viewport()->find_world_3d()->get_scenario().get_id()), "update_configuration_warning");
+	get_tree()->call_group("_world_camera_effects_" + itos(get_viewport()->find_world_3d()->get_scenario().get_id()), "update_configuration_warnings");
 }
 
 void WorldEnvironment::set_environment(const Ref<Environment> &p_environment) {
@@ -96,7 +96,7 @@ void WorldEnvironment::set_environment(const Ref<Environment> &p_environment) {
 	if (is_inside_tree()) {
 		_update_current_environment();
 	} else {
-		update_configuration_warning();
+		update_configuration_warnings();
 	}
 }
 
@@ -121,7 +121,7 @@ void WorldEnvironment::set_camera_effects(const Ref<CameraEffects> &p_camera_eff
 	if (is_inside_tree()) {
 		_update_current_camera_effects();
 	} else {
-		update_configuration_warning();
+		update_configuration_warnings();
 	}
 }
 
@@ -129,35 +129,26 @@ Ref<CameraEffects> WorldEnvironment::get_camera_effects() const {
 	return camera_effects;
 }
 
-String WorldEnvironment::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> WorldEnvironment::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!environment.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("WorldEnvironment requires its \"Environment\" property to contain an Environment to have a visible effect.");
+		warnings.push_back(TTR("WorldEnvironment requires its \"Environment\" property to contain an Environment to have a visible effect."));
 	}
 
 	if (!is_inside_tree()) {
-		return warning;
+		return warnings;
 	}
 
 	if (environment.is_valid() && get_viewport()->find_world_3d()->get_environment() != environment) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Only the first Environment has an effect in a scene (or set of instantiated scenes).");
+		warnings.push_back(("Only the first Environment has an effect in a scene (or set of instantiated scenes)."));
 	}
 
 	if (camera_effects.is_valid() && get_viewport()->find_world_3d()->get_camera_effects() != camera_effects) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Only the first CameraEffects has an effect in a scene (or set of instantiated scenes).");
+		warnings.push_back(TTR("Only one WorldEnvironment is allowed per scene (or set of instanced scenes)."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void WorldEnvironment::_bind_methods() {
diff --git a/scene/3d/world_environment.h b/scene/3d/world_environment.h
index e3f28d6d6b..9e85982381 100644
--- a/scene/3d/world_environment.h
+++ b/scene/3d/world_environment.h
@@ -55,7 +55,7 @@ public:
 	void set_camera_effects(const Ref<CameraEffects> &p_camera_effects);
 	Ref<CameraEffects> get_camera_effects() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	WorldEnvironment();
 };
diff --git a/scene/3d/xr_nodes.cpp b/scene/3d/xr_nodes.cpp
index 63be4352d5..b5037f9be7 100644
--- a/scene/3d/xr_nodes.cpp
+++ b/scene/3d/xr_nodes.cpp
@@ -55,23 +55,18 @@ void XRCamera3D::_notification(int p_what) {
 	};
 };
 
-String XRCamera3D::get_configuration_warning() const {
-	if (!is_visible() || !is_inside_tree()) {
-		return String();
+TypedArray<String> XRCamera3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
+
+	if (is_visible() && is_inside_tree()) {
+		// must be child node of XROrigin3D!
+		XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
+		if (origin == nullptr) {
+			warnings.push_back(TTR("XRCamera3D must have an XROrigin3D node as its parent."));
+		};
 	}
 
-	String warning = Camera3D::get_configuration_warning();
-
-	// must be child node of XROrigin3D!
-	XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
-	if (origin == nullptr) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("XRCamera3D must have an XROrigin3D node as its parent.");
-	};
-
-	return warning;
+	return warnings;
 };
 
 Vector3 XRCamera3D::project_local_ray_normal(const Point2 &p_pos) const {
@@ -265,7 +260,7 @@ void XRController3D::set_controller_id(int p_controller_id) {
 	// We don't check any bounds here, this controller may not yet be active and just be a place holder until it is.
 	// Note that setting this to 0 means this node is not bound to a controller yet.
 	controller_id = p_controller_id;
-	update_configuration_warning();
+	update_configuration_warnings();
 };
 
 int XRController3D::get_controller_id() const {
@@ -362,30 +357,22 @@ XRPositionalTracker::TrackerHand XRController3D::get_tracker_hand() const {
 	return tracker->get_tracker_hand();
 };
 
-String XRController3D::get_configuration_warning() const {
-	if (!is_visible() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> XRController3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	// must be child node of XROrigin!
-	XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
-	if (origin == nullptr) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible() && is_inside_tree()) {
+		// must be child node of XROrigin!
+		XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
+		if (origin == nullptr) {
+			warnings.push_back(TTR("XRController3D must have an XROrigin3D node as its parent."));
 		}
-		warning += TTR("XRController3D must have an XROrigin3D node as its parent.");
-	};
 
-	if (controller_id == 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+		if (controller_id == 0) {
+			warnings.push_back(TTR("The controller ID must not be 0 or this controller won't be bound to an actual controller."));
 		}
-		warning += TTR("The controller ID must not be 0 or this controller won't be bound to an actual controller.");
-	};
+	}
 
-	return warning;
+	return warnings;
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -459,7 +446,7 @@ void XRAnchor3D::set_anchor_id(int p_anchor_id) {
 	// We don't check any bounds here, this anchor may not yet be active and just be a place holder until it is.
 	// Note that setting this to 0 means this node is not bound to an anchor yet.
 	anchor_id = p_anchor_id;
-	update_configuration_warning();
+	update_configuration_warnings();
 };
 
 int XRAnchor3D::get_anchor_id() const {
@@ -487,30 +474,22 @@ bool XRAnchor3D::get_is_active() const {
 	return is_active;
 };
 
-String XRAnchor3D::get_configuration_warning() const {
-	if (!is_visible() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> XRAnchor3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	// must be child node of XROrigin3D!
-	XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
-	if (origin == nullptr) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible() && is_inside_tree()) {
+		// must be child node of XROrigin3D!
+		XROrigin3D *origin = Object::cast_to<XROrigin3D>(get_parent());
+		if (origin == nullptr) {
+			warnings.push_back(TTR("XRAnchor3D must have an XROrigin3D node as its parent."));
 		}
-		warning += TTR("XRAnchor3D must have an XROrigin3D node as its parent.");
-	};
 
-	if (anchor_id == 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+		if (anchor_id == 0) {
+			warnings.push_back(TTR("The anchor ID must not be 0 or this anchor won't be bound to an actual anchor."));
 		}
-		warning += TTR("The anchor ID must not be 0 or this anchor won't be bound to an actual anchor.");
-	};
+	}
 
-	return warning;
+	return warnings;
 };
 
 Plane XRAnchor3D::get_plane() const {
@@ -528,21 +507,16 @@ Ref<Mesh> XRAnchor3D::get_mesh() const {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-String XROrigin3D::get_configuration_warning() const {
-	if (!is_visible() || !is_inside_tree()) {
-		return String();
-	}
-
-	String warning = Node3D::get_configuration_warning();
+TypedArray<String> XROrigin3D::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
-	if (tracked_camera == nullptr) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
+	if (is_visible() && is_inside_tree()) {
+		if (tracked_camera == nullptr) {
+			warnings.push_back(TTR("XROrigin3D requires an XRCamera3D child node."));
 		}
-		warning += TTR("XROrigin3D requires an XRCamera3D child node.");
 	}
 
-	return warning;
+	return warnings;
 };
 
 void XROrigin3D::_bind_methods() {
diff --git a/scene/3d/xr_nodes.h b/scene/3d/xr_nodes.h
index 7cd6e2ac57..90079f5fe9 100644
--- a/scene/3d/xr_nodes.h
+++ b/scene/3d/xr_nodes.h
@@ -50,7 +50,7 @@ protected:
 	void _notification(int p_what);
 
 public:
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	virtual Vector3 project_local_ray_normal(const Point2 &p_pos) const override;
 	virtual Point2 unproject_position(const Vector3 &p_pos) const override;
@@ -97,7 +97,7 @@ public:
 
 	Ref<Mesh> get_mesh() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	XRController3D() {}
 	~XRController3D() {}
@@ -133,7 +133,7 @@ public:
 
 	Ref<Mesh> get_mesh() const;
 
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	XRAnchor3D() {}
 	~XRAnchor3D() {}
@@ -158,7 +158,7 @@ protected:
 	static void _bind_methods();
 
 public:
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void set_tracked_camera(XRCamera3D *p_tracked_camera);
 	void clear_tracked_camera_if(XRCamera3D *p_tracked_camera);
diff --git a/scene/animation/animation_tree.cpp b/scene/animation/animation_tree.cpp
index 4b4d3943c9..44f2d38a84 100644
--- a/scene/animation/animation_tree.cpp
+++ b/scene/animation/animation_tree.cpp
@@ -458,7 +458,7 @@ void AnimationTree::set_tree_root(const Ref<AnimationNode> &p_root) {
 
 	properties_dirty = true;
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Ref<AnimationNode> AnimationTree::get_tree_root() const {
@@ -1262,7 +1262,7 @@ void AnimationTree::_notification(int p_what) {
 
 void AnimationTree::set_animation_player(const NodePath &p_player) {
 	animation_player = p_player;
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 NodePath AnimationTree::get_animation_player() const {
@@ -1281,38 +1281,26 @@ uint64_t AnimationTree::get_last_process_pass() const {
 	return process_pass;
 }
 
-String AnimationTree::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> AnimationTree::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!root.is_valid()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("No root AnimationNode for the graph is set.");
+		warnings.push_back(TTR("No root AnimationNode for the graph is set."));
 	}
 
 	if (!has_node(animation_player)) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Path to an AnimationPlayer node containing animations is not set.");
+		warnings.push_back(TTR("Path to an AnimationPlayer node containing animations is not set."));
 	} else {
 		AnimationPlayer *player = Object::cast_to<AnimationPlayer>(get_node(animation_player));
 
 		if (!player) {
-			if (!warning.is_empty()) {
-				warning += "\n\n";
-			}
-			warning += TTR("Path set for AnimationPlayer does not lead to an AnimationPlayer node.");
+			warnings.push_back(TTR("Path set for AnimationPlayer does not lead to an AnimationPlayer node."));
 		} else if (!player->has_node(player->get_root())) {
-			if (!warning.is_empty()) {
-				warning += "\n\n";
-			}
-			warning += TTR("The AnimationPlayer root node is not a valid node.");
+			warnings.push_back(TTR("The AnimationPlayer root node is not a valid node."));
 		}
 	}
 
-	return warning;
+	return warnings;
 }
 
 void AnimationTree::set_root_motion_track(const NodePath &p_track) {
diff --git a/scene/animation/animation_tree.h b/scene/animation/animation_tree.h
index 1c5aec26ab..700ff1cb5b 100644
--- a/scene/animation/animation_tree.h
+++ b/scene/animation/animation_tree.h
@@ -300,7 +300,7 @@ public:
 	void set_animation_player(const NodePath &p_player);
 	NodePath get_animation_player() const;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	bool is_state_invalid() const;
 	String get_invalid_state_reason() const;
diff --git a/scene/gui/color_picker.cpp b/scene/gui/color_picker.cpp
index 78524a856a..b78f9cad24 100644
--- a/scene/gui/color_picker.cpp
+++ b/scene/gui/color_picker.cpp
@@ -143,6 +143,30 @@ void ColorPicker::_update_controls() {
 		scroll[3]->hide();
 		labels[3]->hide();
 	}
+
+	switch (picker_type) {
+		case SHAPE_HSV_RECTANGLE:
+			wheel_edit->hide();
+			w_edit->show();
+			uv_edit->show();
+			break;
+		case SHAPE_HSV_WHEEL:
+			wheel_edit->show();
+			w_edit->hide();
+			uv_edit->hide();
+
+			wheel->set_material(wheel_mat);
+			break;
+		case SHAPE_VHS_CIRCLE:
+			wheel_edit->show();
+			w_edit->show();
+			uv_edit->hide();
+
+			wheel->set_material(circle_mat);
+			break;
+		default: {
+		}
+	}
 }
 
 void ColorPicker::_set_pick_color(const Color &p_color, bool p_update_sliders) {
@@ -267,6 +291,8 @@ void ColorPicker::_update_color(bool p_update_sliders) {
 	for (int i = 0; i < 4; i++) {
 		scroll[i]->update();
 	}
+	wheel->update();
+	wheel_uv->update();
 	updating = false;
 }
 
@@ -309,6 +335,18 @@ Color ColorPicker::get_pick_color() const {
 	return color;
 }
 
+void ColorPicker::set_picker_shape(PickerShapeType p_picker_type) {
+	ERR_FAIL_INDEX(p_picker_type, SHAPE_MAX);
+	picker_type = p_picker_type;
+
+	_update_controls();
+	_update_color();
+}
+
+ColorPicker::PickerShapeType ColorPicker::get_picker_shape() const {
+	return picker_type;
+}
+
 void ColorPicker::add_preset(const Color &p_color) {
 	if (presets.find(p_color)) {
 		presets.move_to_back(presets.find(p_color));
@@ -421,7 +459,7 @@ void ColorPicker::_update_text_value() {
 }
 
 void ColorPicker::_sample_draw() {
-	const Rect2 r = Rect2(Point2(), Size2(uv_edit->get_size().width, sample->get_size().height * 0.95));
+	const Rect2 r = Rect2(Point2(), Size2(sample->get_size().width, sample->get_size().height * 0.95));
 
 	if (color.a < 1.0) {
 		sample->draw_texture_rect(get_theme_icon("preset_bg", "ColorPicker"), r, true);
@@ -441,42 +479,131 @@ void ColorPicker::_hsv_draw(int p_which, Control *c) {
 	}
 	if (p_which == 0) {
 		Vector<Point2> points;
-		points.push_back(Vector2());
-		points.push_back(Vector2(c->get_size().x, 0));
-		points.push_back(c->get_size());
-		points.push_back(Vector2(0, c->get_size().y));
 		Vector<Color> colors;
-		colors.push_back(Color(1, 1, 1, 1));
-		colors.push_back(Color(1, 1, 1, 1));
-		colors.push_back(Color(0, 0, 0, 1));
-		colors.push_back(Color(0, 0, 0, 1));
-		c->draw_polygon(points, colors);
 		Vector<Color> colors2;
 		Color col = color;
+		Vector2 center = c->get_size() / 2.0;
+
+		switch (picker_type) {
+			case SHAPE_HSV_WHEEL: {
+				points.resize(4);
+				colors.resize(4);
+				colors2.resize(4);
+				real_t ring_radius_x = Math_SQRT12 * c->get_size().width * 0.42;
+				real_t ring_radius_y = Math_SQRT12 * c->get_size().height * 0.42;
+
+				points.set(0, center - Vector2(ring_radius_x, ring_radius_y));
+				points.set(1, center + Vector2(ring_radius_x, -ring_radius_y));
+				points.set(2, center + Vector2(ring_radius_x, ring_radius_y));
+				points.set(3, center + Vector2(-ring_radius_x, ring_radius_y));
+				colors.set(0, Color(1, 1, 1, 1));
+				colors.set(1, Color(1, 1, 1, 1));
+				colors.set(2, Color(0, 0, 0, 1));
+				colors.set(3, Color(0, 0, 0, 1));
+				c->draw_polygon(points, colors);
+
+				col.set_hsv(h, 1, 1);
+				col.a = 0;
+				colors2.set(0, col);
+				col.a = 1;
+				colors2.set(1, col);
+				col.set_hsv(h, 1, 0);
+				colors2.set(2, col);
+				col.a = 0;
+				colors2.set(3, col);
+				c->draw_polygon(points, colors2);
+				break;
+			}
+			case SHAPE_HSV_RECTANGLE: {
+				points.resize(4);
+				colors.resize(4);
+				colors2.resize(4);
+				points.set(0, Vector2());
+				points.set(1, Vector2(c->get_size().x, 0));
+				points.set(2, c->get_size());
+				points.set(3, Vector2(0, c->get_size().y));
+				colors.set(0, Color(1, 1, 1, 1));
+				colors.set(1, Color(1, 1, 1, 1));
+				colors.set(2, Color(0, 0, 0, 1));
+				colors.set(3, Color(0, 0, 0, 1));
+				c->draw_polygon(points, colors);
+				col = color;
+				col.set_hsv(h, 1, 1);
+				col.a = 0;
+				colors2.set(0, col);
+				col.a = 1;
+				colors2.set(1, col);
+				col.set_hsv(h, 1, 0);
+				colors2.set(2, col);
+				col.a = 0;
+				colors2.set(3, col);
+				c->draw_polygon(points, colors2);
+				break;
+			}
+			default: {
+			}
+		}
+		Ref<Texture2D> cursor = get_theme_icon("picker_cursor", "ColorPicker");
+		int x;
+		int y;
+		if (picker_type == SHAPE_VHS_CIRCLE) {
+			x = center.x + (center.x * Math::cos(h * Math_TAU) * s) - (cursor->get_width() / 2);
+			y = center.y + (center.y * Math::sin(h * Math_TAU) * s) - (cursor->get_height() / 2);
+		} else {
+			real_t corner_x = (c == wheel_uv) ? center.x - Math_SQRT12 * c->get_size().width * 0.42 : 0;
+			real_t corner_y = (c == wheel_uv) ? center.y - Math_SQRT12 * c->get_size().height * 0.42 : 0;
+
+			Size2 real_size(c->get_size().x - corner_x * 2, c->get_size().y - corner_y * 2);
+			x = CLAMP(real_size.x * s, 0, real_size.x) + corner_x - (cursor->get_width() / 2);
+			y = CLAMP(real_size.y - real_size.y * v, 0, real_size.y) + corner_y - (cursor->get_height() / 2);
+		}
+		c->draw_texture(cursor, Point2(x, y));
+
 		col.set_hsv(h, 1, 1);
-		col.a = 0;
-		colors2.push_back(col);
-		col.a = 1;
-		colors2.push_back(col);
-		col.set_hsv(h, 1, 0);
-		colors2.push_back(col);
-		col.a = 0;
-		colors2.push_back(col);
-		c->draw_polygon(points, colors2);
-		int x = CLAMP(c->get_size().x * s, 0, c->get_size().x);
-		int y = CLAMP(c->get_size().y - c->get_size().y * v, 0, c->get_size().y);
-		col = color;
-		col.a = 1;
-		c->draw_line(Point2(x, 0), Point2(x, c->get_size().y), col.inverted());
-		c->draw_line(Point2(0, y), Point2(c->get_size().x, y), col.inverted());
-		c->draw_line(Point2(x, y), Point2(x, y), Color(1, 1, 1), 2);
+		if (picker_type == SHAPE_HSV_WHEEL) {
+			points.resize(4);
+			double h1 = h - (0.5 / 360);
+			double h2 = h + (0.5 / 360);
+			points.set(0, Point2(center.x + (center.x * Math::cos(h1 * Math_TAU)), center.y + (center.y * Math::sin(h1 * Math_TAU))));
+			points.set(1, Point2(center.x + (center.x * Math::cos(h1 * Math_TAU) * 0.84), center.y + (center.y * Math::sin(h1 * Math_TAU) * 0.84)));
+			points.set(2, Point2(center.x + (center.x * Math::cos(h2 * Math_TAU)), center.y + (center.y * Math::sin(h2 * Math_TAU))));
+			points.set(3, Point2(center.x + (center.x * Math::cos(h2 * Math_TAU) * 0.84), center.y + (center.y * Math::sin(h2 * Math_TAU) * 0.84)));
+			c->draw_multiline(points, col.inverted());
+		}
+
 	} else if (p_which == 1) {
-		Ref<Texture2D> hue = get_theme_icon("color_hue", "ColorPicker");
-		c->draw_texture_rect(hue, Rect2(Point2(), c->get_size()));
-		int y = c->get_size().y - c->get_size().y * (1.0 - h);
-		Color col = Color();
-		col.set_hsv(h, 1, 1);
-		c->draw_line(Point2(0, y), Point2(c->get_size().x, y), col.inverted());
+		if (picker_type == SHAPE_HSV_RECTANGLE) {
+			Ref<Texture2D> hue = get_theme_icon("color_hue", "ColorPicker");
+			c->draw_texture_rect(hue, Rect2(Point2(), c->get_size()));
+			int y = c->get_size().y - c->get_size().y * (1.0 - h);
+			Color col;
+			col.set_hsv(h, 1, 1);
+			c->draw_line(Point2(0, y), Point2(c->get_size().x, y), col.inverted());
+		} else if (picker_type == SHAPE_VHS_CIRCLE) {
+			Vector<Point2> points;
+			Vector<Color> colors;
+			Color col;
+			col.set_hsv(h, s, 1);
+			points.resize(4);
+			colors.resize(4);
+			points.set(0, Vector2());
+			points.set(1, Vector2(c->get_size().x, 0));
+			points.set(2, c->get_size());
+			points.set(3, Vector2(0, c->get_size().y));
+			colors.set(0, col);
+			colors.set(1, col);
+			colors.set(2, Color(0, 0, 0));
+			colors.set(3, Color(0, 0, 0));
+			c->draw_polygon(points, colors);
+			int y = c->get_size().y - c->get_size().y * CLAMP(v, 0, 1);
+			col.set_hsv(h, 1, v);
+			c->draw_line(Point2(0, y), Point2(c->get_size().x, y), col.inverted());
+		}
+	} else if (p_which == 2) {
+		c->draw_rect(Rect2(Point2(), c->get_size()), Color(1, 1, 1));
+		if (picker_type == SHAPE_VHS_CIRCLE) {
+			circle_mat->set_shader_param("v", v);
+		}
 	}
 }
 
@@ -543,16 +670,51 @@ void ColorPicker::_slider_draw(int p_which) {
 	scroll[p_which]->draw_polygon(pos, col);
 }
 
-void ColorPicker::_uv_input(const Ref<InputEvent> &p_event) {
+void ColorPicker::_uv_input(const Ref<InputEvent> &p_event, Control *c) {
 	Ref<InputEventMouseButton> bev = p_event;
 
 	if (bev.is_valid()) {
 		if (bev->is_pressed() && bev->get_button_index() == MOUSE_BUTTON_LEFT) {
+			Vector2 center = c->get_size() / 2.0;
+			if (picker_type == SHAPE_VHS_CIRCLE) {
+				real_t dist = center.distance_to(bev->get_position());
+
+				if (dist <= center.x) {
+					real_t rad = Math::atan2(bev->get_position().y - center.y, bev->get_position().x - center.x);
+					h = ((rad >= 0) ? rad : (Math_TAU + rad)) / Math_TAU;
+					s = CLAMP(dist / center.x, 0, 1);
+				} else {
+					return;
+				}
+			} else {
+				real_t corner_x = (c == wheel_uv) ? center.x - Math_SQRT12 * c->get_size().width * 0.42 : 0;
+				real_t corner_y = (c == wheel_uv) ? center.y - Math_SQRT12 * c->get_size().height * 0.42 : 0;
+				Size2 real_size(c->get_size().x - corner_x * 2, c->get_size().y - corner_y * 2);
+
+				if (bev->get_position().x < corner_x || bev->get_position().x > c->get_size().x - corner_x ||
+						bev->get_position().y < corner_y || bev->get_position().y > c->get_size().y - corner_y) {
+					{
+						real_t dist = center.distance_to(bev->get_position());
+
+						if (dist >= center.x * 0.84 && dist <= center.x) {
+							real_t rad = Math::atan2(bev->get_position().y - center.y, bev->get_position().x - center.x);
+							h = ((rad >= 0) ? rad : (Math_TAU + rad)) / Math_TAU;
+							spinning = true;
+						} else {
+							return;
+						}
+					}
+				}
+
+				if (!spinning) {
+					real_t x = CLAMP(bev->get_position().x, corner_x, c->get_size().x - corner_x);
+					real_t y = CLAMP(bev->get_position().y, corner_x, c->get_size().y - corner_y);
+
+					s = (x - c->get_position().x - corner_x) / real_size.x;
+					v = 1.0 - (y - c->get_position().y - corner_y) / real_size.y;
+				}
+			}
 			changing_color = true;
-			float x = CLAMP((float)bev->get_position().x, 0, uv_edit->get_size().width);
-			float y = CLAMP((float)bev->get_position().y, 0, uv_edit->get_size().height);
-			s = x / uv_edit->get_size().width;
-			v = 1.0 - y / uv_edit->get_size().height;
 			color.set_hsv(h, s, v, color.a);
 			last_hsv = color;
 			set_pick_color(color);
@@ -563,8 +725,10 @@ void ColorPicker::_uv_input(const Ref<InputEvent> &p_event) {
 		} else if (deferred_mode_enabled && !bev->is_pressed() && bev->get_button_index() == MOUSE_BUTTON_LEFT) {
 			emit_signal("color_changed", color);
 			changing_color = false;
+			spinning = false;
 		} else {
 			changing_color = false;
+			spinning = false;
 		}
 	}
 
@@ -574,10 +738,30 @@ void ColorPicker::_uv_input(const Ref<InputEvent> &p_event) {
 		if (!changing_color) {
 			return;
 		}
-		float x = CLAMP((float)mev->get_position().x, 0, uv_edit->get_size().width);
-		float y = CLAMP((float)mev->get_position().y, 0, uv_edit->get_size().height);
-		s = x / uv_edit->get_size().width;
-		v = 1.0 - y / uv_edit->get_size().height;
+
+		Vector2 center = c->get_size() / 2.0;
+		if (picker_type == SHAPE_VHS_CIRCLE) {
+			real_t dist = center.distance_to(mev->get_position());
+			real_t rad = Math::atan2(mev->get_position().y - center.y, mev->get_position().x - center.x);
+			h = ((rad >= 0) ? rad : (Math_TAU + rad)) / Math_TAU;
+			s = CLAMP(dist / center.x, 0, 1);
+		} else {
+			if (spinning) {
+				real_t rad = Math::atan2(mev->get_position().y - center.y, mev->get_position().x - center.x);
+				h = ((rad >= 0) ? rad : (Math_TAU + rad)) / Math_TAU;
+			} else {
+				real_t corner_x = (c == wheel_uv) ? center.x - Math_SQRT12 * c->get_size().width * 0.42 : 0;
+				real_t corner_y = (c == wheel_uv) ? center.y - Math_SQRT12 * c->get_size().height * 0.42 : 0;
+				Size2 real_size(c->get_size().x - corner_x * 2, c->get_size().y - corner_y * 2);
+
+				real_t x = CLAMP(mev->get_position().x, corner_x, c->get_size().x - corner_x);
+				real_t y = CLAMP(mev->get_position().y, corner_x, c->get_size().y - corner_y);
+
+				s = (x - corner_x) / real_size.x;
+				v = 1.0 - (y - corner_y) / real_size.y;
+			}
+		}
+
 		color.set_hsv(h, s, v, color.a);
 		last_hsv = color;
 		set_pick_color(color);
@@ -595,7 +779,11 @@ void ColorPicker::_w_input(const Ref<InputEvent> &p_event) {
 		if (bev->is_pressed() && bev->get_button_index() == MOUSE_BUTTON_LEFT) {
 			changing_color = true;
 			float y = CLAMP((float)bev->get_position().y, 0, w_edit->get_size().height);
-			h = y / w_edit->get_size().height;
+			if (picker_type == SHAPE_VHS_CIRCLE) {
+				v = 1.0 - (y / w_edit->get_size().height);
+			} else {
+				h = y / w_edit->get_size().height;
+			}
 		} else {
 			changing_color = false;
 		}
@@ -617,7 +805,11 @@ void ColorPicker::_w_input(const Ref<InputEvent> &p_event) {
 			return;
 		}
 		float y = CLAMP((float)mev->get_position().y, 0, w_edit->get_size().height);
-		h = y / w_edit->get_size().height;
+		if (picker_type == SHAPE_VHS_CIRCLE) {
+			v = 1.0 - (y / w_edit->get_size().height);
+		} else {
+			h = y / w_edit->get_size().height;
+		}
 		color.set_hsv(h, s, v, color.a);
 		last_hsv = color;
 		set_pick_color(color);
@@ -801,18 +993,25 @@ void ColorPicker::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("add_preset", "color"), &ColorPicker::add_preset);
 	ClassDB::bind_method(D_METHOD("erase_preset", "color"), &ColorPicker::erase_preset);
 	ClassDB::bind_method(D_METHOD("get_presets"), &ColorPicker::get_presets);
+	ClassDB::bind_method(D_METHOD("set_picker_shape", "picker"), &ColorPicker::set_picker_shape);
+	ClassDB::bind_method(D_METHOD("get_picker_shape"), &ColorPicker::get_picker_shape);
 
 	ADD_PROPERTY(PropertyInfo(Variant::COLOR, "color"), "set_pick_color", "get_pick_color");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "edit_alpha"), "set_edit_alpha", "is_editing_alpha");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "hsv_mode"), "set_hsv_mode", "is_hsv_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "raw_mode"), "set_raw_mode", "is_raw_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "deferred_mode"), "set_deferred_mode", "is_deferred_mode");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "picker_shape", PROPERTY_HINT_ENUM, "HSV Rectangle,HSV Rectangle Wheel,VHS Circle"), "set_picker_shape", "get_picker_shape");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "presets_enabled"), "set_presets_enabled", "are_presets_enabled");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "presets_visible"), "set_presets_visible", "are_presets_visible");
 
 	ADD_SIGNAL(MethodInfo("color_changed", PropertyInfo(Variant::COLOR, "color")));
 	ADD_SIGNAL(MethodInfo("preset_added", PropertyInfo(Variant::COLOR, "color")));
 	ADD_SIGNAL(MethodInfo("preset_removed", PropertyInfo(Variant::COLOR, "color")));
+
+	BIND_ENUM_CONSTANT(SHAPE_HSV_RECTANGLE);
+	BIND_ENUM_CONSTANT(SHAPE_HSV_WHEEL);
+	BIND_ENUM_CONSTANT(SHAPE_VHS_CIRCLE);
 }
 
 ColorPicker::ColorPicker() :
@@ -821,32 +1020,21 @@ ColorPicker::ColorPicker() :
 	add_child(hb_edit);
 	hb_edit->set_v_size_flags(SIZE_EXPAND_FILL);
 
-	uv_edit = memnew(Control);
 	hb_edit->add_child(uv_edit);
-	uv_edit->connect("gui_input", callable_mp(this, &ColorPicker::_uv_input));
+	uv_edit->connect("gui_input", callable_mp(this, &ColorPicker::_uv_input), make_binds(uv_edit));
 	uv_edit->set_mouse_filter(MOUSE_FILTER_PASS);
 	uv_edit->set_h_size_flags(SIZE_EXPAND_FILL);
 	uv_edit->set_v_size_flags(SIZE_EXPAND_FILL);
 	uv_edit->set_custom_minimum_size(Size2(get_theme_constant("sv_width"), get_theme_constant("sv_height")));
 	uv_edit->connect("draw", callable_mp(this, &ColorPicker::_hsv_draw), make_binds(0, uv_edit));
 
-	w_edit = memnew(Control);
-	hb_edit->add_child(w_edit);
-	w_edit->set_custom_minimum_size(Size2(get_theme_constant("h_width"), 0));
-	w_edit->set_h_size_flags(SIZE_FILL);
-	w_edit->set_v_size_flags(SIZE_EXPAND_FILL);
-	w_edit->connect("gui_input", callable_mp(this, &ColorPicker::_w_input));
-	w_edit->connect("draw", callable_mp(this, &ColorPicker::_hsv_draw), make_binds(1, w_edit));
-
 	HBoxContainer *hb_smpl = memnew(HBoxContainer);
 	add_child(hb_smpl);
 
-	sample = memnew(TextureRect);
 	hb_smpl->add_child(sample);
 	sample->set_h_size_flags(SIZE_EXPAND_FILL);
 	sample->connect("draw", callable_mp(this, &ColorPicker::_sample_draw));
 
-	btn_pick = memnew(Button);
 	btn_pick->set_flat(true);
 	hb_smpl->add_child(btn_pick);
 	btn_pick->set_toggle_mode(true);
@@ -896,17 +1084,14 @@ ColorPicker::ColorPicker() :
 	HBoxContainer *hhb = memnew(HBoxContainer);
 	vbr->add_child(hhb);
 
-	btn_hsv = memnew(CheckButton);
 	hhb->add_child(btn_hsv);
 	btn_hsv->set_text(RTR("HSV"));
 	btn_hsv->connect("toggled", callable_mp(this, &ColorPicker::set_hsv_mode));
 
-	btn_raw = memnew(CheckButton);
 	hhb->add_child(btn_raw);
 	btn_raw->set_text(RTR("Raw"));
 	btn_raw->connect("toggled", callable_mp(this, &ColorPicker::set_raw_mode));
 
-	text_type = memnew(Button);
 	hhb->add_child(text_type);
 	text_type->set_text("#");
 	text_type->set_tooltip(TTR("Switch between hexadecimal and code values."));
@@ -920,34 +1105,68 @@ ColorPicker::ColorPicker() :
 		text_type->set_mouse_filter(MOUSE_FILTER_IGNORE);
 	}
 
-	c_text = memnew(LineEdit);
 	hhb->add_child(c_text);
 	c_text->set_h_size_flags(SIZE_EXPAND_FILL);
 	c_text->connect("text_entered", callable_mp(this, &ColorPicker::_html_entered));
 	c_text->connect("focus_entered", callable_mp(this, &ColorPicker::_focus_enter));
 	c_text->connect("focus_exited", callable_mp(this, &ColorPicker::_html_focus_exit));
 
+	wheel_edit->set_h_size_flags(SIZE_EXPAND_FILL);
+	wheel_edit->set_v_size_flags(SIZE_EXPAND_FILL);
+	wheel_edit->set_custom_minimum_size(Size2(get_theme_constant("sv_width"), get_theme_constant("sv_height")));
+	hb_edit->add_child(wheel_edit);
+
+	wheel_mat.instance();
+	circle_mat.instance();
+
+	Ref<Shader> wheel_shader(memnew(Shader));
+	wheel_shader->set_code("shader_type canvas_item;const float TAU=6.28318530718;void fragment(){float x=UV.x-0.5;float y=UV.y-0.5;float a=atan(y,x);x+=0.001;y+=0.001;float b=float(sqrt(x*x+y*y)<0.5)*float(sqrt(x*x+y*y)>0.42);x-=0.002;float b2=float(sqrt(x*x+y*y)<0.5)*float(sqrt(x*x+y*y)>0.42);y-=0.002;float b3=float(sqrt(x*x+y*y)<0.5)*float(sqrt(x*x+y*y)>0.42);x+=0.002;float b4=float(sqrt(x*x+y*y)<0.5)*float(sqrt(x*x+y*y)>0.42);COLOR=vec4(clamp((abs(fract(((a-TAU)/TAU)+vec3(3.0,2.0,1.0)/3.0)*6.0-3.0)-1.0),0.0,1.0),(b+b2+b3+b4)/4.00);}");
+	wheel_mat->set_shader(wheel_shader);
+
+	Ref<Shader> circle_shader(memnew(Shader));
+	circle_shader->set_code("shader_type canvas_item;const float TAU=6.28318530718;uniform float v=1.0;void fragment(){float x=UV.x-0.5;float y=UV.y-0.5;float a=atan(y,x);x+=0.001;y+=0.001;float b=float(sqrt(x*x+y*y)<0.5);x-=0.002;float b2=float(sqrt(x*x+y*y)<0.5);y-=0.002;float b3=float(sqrt(x*x+y*y)<0.5);x+=0.002;float b4=float(sqrt(x*x+y*y)<0.5);COLOR=vec4(mix(vec3(1.0),clamp(abs(fract(vec3((a-TAU)/TAU)+vec3(1.0,2.0/3.0,1.0/3.0))*6.0-vec3(3.0))-vec3(1.0),0.0,1.0),((float(sqrt(x*x+y*y))*2.0))/1.0)*vec3(v),(b+b2+b3+b4)/4.00);}");
+	circle_mat->set_shader(circle_shader);
+
+	MarginContainer *wheel_margin(memnew(MarginContainer));
+#ifdef TOOLS_ENABLED
+	wheel_margin->add_theme_constant_override("margin_bottom", 8 * EDSCALE);
+#else
+	wheel_margin->add_theme_constant_override("margin_bottom", 8);
+#endif
+	wheel_edit->add_child(wheel_margin);
+
+	wheel_margin->add_child(wheel);
+	wheel->set_mouse_filter(MOUSE_FILTER_PASS);
+	wheel->connect("draw", callable_mp(this, &ColorPicker::_hsv_draw), make_binds(2, wheel));
+
+	wheel_margin->add_child(wheel_uv);
+	wheel_uv->connect("gui_input", callable_mp(this, &ColorPicker::_uv_input), make_binds(wheel_uv));
+	wheel_uv->connect("draw", callable_mp(this, &ColorPicker::_hsv_draw), make_binds(0, wheel_uv));
+
+	hb_edit->add_child(w_edit);
+	w_edit->set_custom_minimum_size(Size2(get_theme_constant("h_width"), 0));
+	w_edit->set_h_size_flags(SIZE_FILL);
+	w_edit->set_v_size_flags(SIZE_EXPAND_FILL);
+	w_edit->connect("gui_input", callable_mp(this, &ColorPicker::_w_input));
+	w_edit->connect("draw", callable_mp(this, &ColorPicker::_hsv_draw), make_binds(1, w_edit));
+
+	picker_type = SHAPE_HSV_RECTANGLE;
 	_update_controls();
 	updating = false;
 
 	set_pick_color(Color(1, 1, 1));
 
-	preset_separator = memnew(HSeparator);
 	add_child(preset_separator);
 
-	preset_container = memnew(HBoxContainer);
 	preset_container->set_h_size_flags(SIZE_EXPAND_FILL);
 	add_child(preset_container);
 
-	preset = memnew(TextureRect);
 	preset_container->add_child(preset);
 	preset->connect("gui_input", callable_mp(this, &ColorPicker::_preset_input));
 	preset->connect("draw", callable_mp(this, &ColorPicker::_update_presets));
 
-	preset_container2 = memnew(HBoxContainer);
 	preset_container2->set_h_size_flags(SIZE_EXPAND_FILL);
 	add_child(preset_container2);
-	bt_add_preset = memnew(Button);
 	preset_container2->add_child(bt_add_preset);
 	bt_add_preset->set_tooltip(RTR("Add current color as a preset."));
 	bt_add_preset->connect("pressed", callable_mp(this, &ColorPicker::_add_preset_pressed));
diff --git a/scene/gui/color_picker.h b/scene/gui/color_picker.h
index 24e1746c41..a0d2aa95ca 100644
--- a/scene/gui/color_picker.h
+++ b/scene/gui/color_picker.h
@@ -31,6 +31,7 @@
 #ifndef COLOR_PICKER_H
 #define COLOR_PICKER_H
 
+#include "scene/gui/aspect_ratio_container.h"
 #include "scene/gui/box_container.h"
 #include "scene/gui/button.h"
 #include "scene/gui/check_button.h"
@@ -45,29 +46,44 @@
 class ColorPicker : public BoxContainer {
 	GDCLASS(ColorPicker, BoxContainer);
 
+public:
+	enum PickerShapeType {
+		SHAPE_HSV_RECTANGLE,
+		SHAPE_HSV_WHEEL,
+		SHAPE_VHS_CIRCLE,
+
+		SHAPE_MAX
+	};
+
 private:
 	Control *screen = nullptr;
-	Control *uv_edit;
-	Control *w_edit;
-	TextureRect *sample;
-	TextureRect *preset;
-	HBoxContainer *preset_container;
-	HBoxContainer *preset_container2;
-	HSeparator *preset_separator;
-	Button *bt_add_preset;
+	Control *uv_edit = memnew(Control);
+	Control *w_edit = memnew(Control);
+	AspectRatioContainer *wheel_edit = memnew(AspectRatioContainer);
+	Ref<ShaderMaterial> wheel_mat;
+	Ref<ShaderMaterial> circle_mat;
+	Control *wheel = memnew(Control);
+	Control *wheel_uv = memnew(Control);
+	TextureRect *sample = memnew(TextureRect);
+	TextureRect *preset = memnew(TextureRect);
+	HBoxContainer *preset_container = memnew(HBoxContainer);
+	HBoxContainer *preset_container2 = memnew(HBoxContainer);
+	HSeparator *preset_separator = memnew(HSeparator);
+	Button *bt_add_preset = memnew(Button);
 	List<Color> presets;
-	Button *btn_pick;
-	CheckButton *btn_hsv;
-	CheckButton *btn_raw;
+	Button *btn_pick = memnew(Button);
+	CheckButton *btn_hsv = memnew(CheckButton);
+	CheckButton *btn_raw = memnew(CheckButton);
 	HSlider *scroll[4];
 	SpinBox *values[4];
 	Label *labels[4];
-	Button *text_type;
-	LineEdit *c_text;
+	Button *text_type = memnew(Button);
+	LineEdit *c_text = memnew(LineEdit);
 	bool edit_alpha = true;
 	Size2i ms;
 	bool text_is_constructor = false;
 	int presets_per_row = 0;
+	PickerShapeType picker_type = SHAPE_HSV_WHEEL;
 
 	Color color;
 	bool raw_mode_enabled = false;
@@ -75,6 +91,7 @@ private:
 	bool deferred_mode_enabled = false;
 	bool updating = true;
 	bool changing_color = false;
+	bool spinning = false;
 	bool presets_enabled = true;
 	bool presets_visible = true;
 	float h = 0.0;
@@ -93,7 +110,7 @@ private:
 	void _hsv_draw(int p_which, Control *c);
 	void _slider_draw(int p_which);
 
-	void _uv_input(const Ref<InputEvent> &p_event);
+	void _uv_input(const Ref<InputEvent> &p_event, Control *c);
 	void _w_input(const Ref<InputEvent> &p_event);
 	void _preset_input(const Ref<InputEvent> &p_event);
 	void _screen_input(const Ref<InputEvent> &p_event);
@@ -115,6 +132,9 @@ public:
 	void set_pick_color(const Color &p_color);
 	Color get_pick_color() const;
 
+	void set_picker_shape(PickerShapeType p_picker_type);
+	PickerShapeType get_picker_shape() const;
+
 	void add_preset(const Color &p_color);
 	void erase_preset(const Color &p_color);
 	PackedColorArray get_presets() const;
@@ -175,4 +195,5 @@ public:
 	ColorPickerButton();
 };
 
+VARIANT_ENUM_CAST(ColorPicker::PickerShapeType);
 #endif // COLOR_PICKER_H
diff --git a/scene/gui/container.cpp b/scene/gui/container.cpp
index 2e6b798eea..dea69aae6b 100644
--- a/scene/gui/container.cpp
+++ b/scene/gui/container.cpp
@@ -159,16 +159,14 @@ void Container::_notification(int p_what) {
 	}
 }
 
-String Container::get_configuration_warning() const {
-	String warning = Control::get_configuration_warning();
+TypedArray<String> Container::get_configuration_warnings() const {
+	TypedArray<String> warnings = Control::get_configuration_warnings();
 
 	if (get_class() == "Container" && get_script().is_null()) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Container by itself serves no purpose unless a script configures its children placement behavior.\nIf you don't intend to add a script, use a plain Control node instead.");
+		warnings.push_back(TTR("Container by itself serves no purpose unless a script configures its children placement behavior.\nIf you don't intend to add a script, use a plain Control node instead."));
 	}
-	return warning;
+
+	return warnings;
 }
 
 void Container::_bind_methods() {
diff --git a/scene/gui/container.h b/scene/gui/container.h
index a4f392a3ae..bce3085f0c 100644
--- a/scene/gui/container.h
+++ b/scene/gui/container.h
@@ -56,7 +56,7 @@ public:
 
 	void fit_child_in_rect(Control *p_child, const Rect2 &p_rect);
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	Container();
 };
diff --git a/scene/gui/control.cpp b/scene/gui/control.cpp
index 300201c0db..f569fbc420 100644
--- a/scene/gui/control.cpp
+++ b/scene/gui/control.cpp
@@ -2183,7 +2183,7 @@ Ref<Theme> Control::get_theme() const {
 
 void Control::set_tooltip(const String &p_tooltip) {
 	data.tooltip = p_tooltip;
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 String Control::get_tooltip(const Point2 &p_pos) const {
@@ -2468,7 +2468,7 @@ int Control::get_v_size_flags() const {
 void Control::set_mouse_filter(MouseFilter p_filter) {
 	ERR_FAIL_INDEX(p_filter, 3);
 	data.mouse_filter = p_filter;
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 Control::MouseFilter Control::get_mouse_filter() const {
@@ -2707,17 +2707,14 @@ void Control::get_argument_options(const StringName &p_function, int p_idx, List
 	}
 }
 
-String Control::get_configuration_warning() const {
-	String warning = CanvasItem::get_configuration_warning();
+TypedArray<String> Control::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (data.mouse_filter == MOUSE_FILTER_IGNORE && data.tooltip != "") {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("The Hint Tooltip won't be displayed as the control's Mouse Filter is set to \"Ignore\". To solve this, set the Mouse Filter to \"Stop\" or \"Pass\".");
+		warnings.push_back(TTR("The Hint Tooltip won't be displayed as the control's Mouse Filter is set to \"Ignore\". To solve this, set the Mouse Filter to \"Stop\" or \"Pass\"."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void Control::set_clip_contents(bool p_clip) {
diff --git a/scene/gui/control.h b/scene/gui/control.h
index 184b2df6d3..1f397df589 100644
--- a/scene/gui/control.h
+++ b/scene/gui/control.h
@@ -524,7 +524,7 @@ public:
 	bool is_visibility_clip_disabled() const;
 
 	virtual void get_argument_options(const StringName &p_function, int p_idx, List<String> *r_options) const override;
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	Control() {}
 };
diff --git a/scene/gui/range.cpp b/scene/gui/range.cpp
index 86b775e795..adc1ed67ca 100644
--- a/scene/gui/range.cpp
+++ b/scene/gui/range.cpp
@@ -30,17 +30,14 @@
 
 #include "range.h"
 
-String Range::get_configuration_warning() const {
-	String warning = Control::get_configuration_warning();
+TypedArray<String> Range::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (shared->exp_ratio && shared->min <= 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("If \"Exp Edit\" is enabled, \"Min Value\" must be greater than 0.");
+		warnings.push_back(TTR("If \"Exp Edit\" is enabled, \"Min Value\" must be greater than 0."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void Range::_value_changed_notify() {
@@ -106,7 +103,7 @@ void Range::set_min(double p_min) {
 
 	shared->emit_changed("min");
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 void Range::set_max(double p_max) {
@@ -181,7 +178,6 @@ double Range::get_as_ratio() const {
 		double v = Math::log(value) / Math::log((double)2);
 
 		return CLAMP((v - exp_min) / (exp_max - exp_min), 0, 1);
-
 	} else {
 		float value = CLAMP(get_value(), shared->min, shared->max);
 		return CLAMP((value - get_min()) / (get_max() - get_min()), 0, 1);
@@ -287,7 +283,7 @@ bool Range::is_using_rounded_values() const {
 void Range::set_exp_ratio(bool p_enable) {
 	shared->exp_ratio = p_enable;
 
-	update_configuration_warning();
+	update_configuration_warnings();
 }
 
 bool Range::is_ratio_exp() const {
diff --git a/scene/gui/range.h b/scene/gui/range.h
index 1072a109c6..7a129e88d6 100644
--- a/scene/gui/range.h
+++ b/scene/gui/range.h
@@ -97,7 +97,7 @@ public:
 	void share(Range *p_range);
 	void unshare();
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	Range();
 	~Range();
diff --git a/scene/gui/scroll_container.cpp b/scene/gui/scroll_container.cpp
index 757a0841ea..73c6371658 100644
--- a/scene/gui/scroll_container.cpp
+++ b/scene/gui/scroll_container.cpp
@@ -544,8 +544,8 @@ void ScrollContainer::set_follow_focus(bool p_follow) {
 	follow_focus = p_follow;
 }
 
-String ScrollContainer::get_configuration_warning() const {
-	String warning = Container::get_configuration_warning();
+TypedArray<String> ScrollContainer::get_configuration_warnings() const {
+	TypedArray<String> warnings = Container::get_configuration_warnings();
 
 	int found = 0;
 
@@ -565,12 +565,10 @@ String ScrollContainer::get_configuration_warning() const {
 	}
 
 	if (found != 1) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("ScrollContainer is intended to work with a single child control.\nUse a container as child (VBox, HBox, etc.), or a Control and set the custom minimum size manually.");
+		warnings.push_back(TTR("ScrollContainer is intended to work with a single child control.\nUse a container as child (VBox, HBox, etc.), or a Control and set the custom minimum size manually."));
 	}
-	return warning;
+
+	return warnings;
 }
 
 HScrollBar *ScrollContainer::get_h_scrollbar() {
diff --git a/scene/gui/scroll_container.h b/scene/gui/scroll_container.h
index 9d3ce39345..e7d73bab0a 100644
--- a/scene/gui/scroll_container.h
+++ b/scene/gui/scroll_container.h
@@ -103,7 +103,7 @@ public:
 
 	virtual bool clips_input() const override;
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	ScrollContainer();
 };
diff --git a/scene/gui/text_edit.cpp b/scene/gui/text_edit.cpp
index f54ab004c6..281e6adada 100644
--- a/scene/gui/text_edit.cpp
+++ b/scene/gui/text_edit.cpp
@@ -1973,7 +1973,7 @@ void TextEdit::backspace_at_cursor() {
 		}
 	}
 
-	cursor_set_line(prev_line, true, true);
+	cursor_set_line(prev_line, false, true);
 	cursor_set_column(prev_column);
 }
 
@@ -2207,7 +2207,7 @@ void TextEdit::_new_line(bool p_split_current_line, bool p_above) {
 	if (!p_split_current_line) {
 		if (p_above) {
 			if (cursor.line > 0) {
-				cursor_set_line(cursor.line - 1);
+				cursor_set_line(cursor.line - 1, false);
 				cursor_set_column(text[cursor.line].length());
 			} else {
 				cursor_set_column(0);
@@ -2223,7 +2223,7 @@ void TextEdit::_new_line(bool p_split_current_line, bool p_above) {
 	if (first_line) {
 		cursor_set_line(0);
 	} else if (brace_indent) {
-		cursor_set_line(cursor.line - 1);
+		cursor_set_line(cursor.line - 1, false);
 		cursor_set_column(text[cursor.line].length());
 	}
 	end_complex_operation();
@@ -2573,7 +2573,7 @@ void TextEdit::_backspace(bool p_word, bool p_all_to_left) {
 
 		_remove_text(line, column, cursor.line, cursor.column);
 
-		cursor_set_line(line);
+		cursor_set_line(line, false);
 		cursor_set_column(column);
 	} else {
 		// One character.
@@ -2640,7 +2640,7 @@ void TextEdit::_delete_selection() {
 		selection.active = false;
 		update();
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line, true, false);
+		cursor_set_line(selection.from_line, false, false);
 		cursor_set_column(selection.from_column);
 		update();
 	}
@@ -3851,7 +3851,7 @@ void TextEdit::_insert_text_at_cursor(const String &p_text) {
 	int new_column, new_line;
 	_insert_text(cursor.line, cursor.column, p_text, &new_line, &new_column);
 	_update_scrollbars();
-	cursor_set_line(new_line);
+	cursor_set_line(new_line, false);
 	cursor_set_column(new_column);
 
 	update();
@@ -4425,7 +4425,7 @@ int TextEdit::get_column_x_offset_for_line(int p_char, int p_line) const {
 
 void TextEdit::insert_text_at_cursor(const String &p_text) {
 	if (selection.active) {
-		cursor_set_line(selection.from_line);
+		cursor_set_line(selection.from_line, false);
 		cursor_set_column(selection.from_column);
 
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
@@ -5042,7 +5042,7 @@ void TextEdit::cut() {
 		DisplayServer::get_singleton()->clipboard_set(clipboard);
 
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line); // Set afterwards else it causes the view to be offset.
+		cursor_set_line(selection.from_line, false); // Set afterwards else it causes the view to be offset.
 		cursor_set_column(selection.from_column);
 
 		selection.active = false;
@@ -5078,7 +5078,7 @@ void TextEdit::paste() {
 		selection.active = false;
 		selection.selecting_mode = SelectionMode::SELECTION_MODE_NONE;
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line);
+		cursor_set_line(selection.from_line, false);
 		cursor_set_column(selection.from_column);
 
 	} else if (!cut_copy_line.is_empty() && cut_copy_line == clipboard) {
@@ -5817,11 +5817,11 @@ void TextEdit::undo() {
 
 	_update_scrollbars();
 	if (undo_stack_pos->get().type == TextOperation::TYPE_REMOVE) {
-		cursor_set_line(undo_stack_pos->get().to_line);
+		cursor_set_line(undo_stack_pos->get().to_line, false);
 		cursor_set_column(undo_stack_pos->get().to_column);
 		_cancel_code_hint();
 	} else {
-		cursor_set_line(undo_stack_pos->get().from_line);
+		cursor_set_line(undo_stack_pos->get().from_line, false);
 		cursor_set_column(undo_stack_pos->get().from_column);
 	}
 	update();
@@ -5856,7 +5856,7 @@ void TextEdit::redo() {
 	}
 
 	_update_scrollbars();
-	cursor_set_line(undo_stack_pos->get().to_line);
+	cursor_set_line(undo_stack_pos->get().to_line, false);
 	cursor_set_column(undo_stack_pos->get().to_column);
 	undo_stack_pos = undo_stack_pos->next();
 	update();
diff --git a/scene/main/node.cpp b/scene/main/node.cpp
index 27712242d1..b7313749d6 100644
--- a/scene/main/node.cpp
+++ b/scene/main/node.cpp
@@ -2634,15 +2634,27 @@ void Node::clear_internal_tree_resource_paths() {
 	}
 }
 
-String Node::get_configuration_warning() const {
+TypedArray<String> Node::get_configuration_warnings() const {
 	if (get_script_instance() && get_script_instance()->get_script().is_valid() &&
-			get_script_instance()->get_script()->is_tool() && get_script_instance()->has_method("_get_configuration_warning")) {
-		return get_script_instance()->call("_get_configuration_warning");
+			get_script_instance()->get_script()->is_tool() && get_script_instance()->has_method("_get_configuration_warnings")) {
+		return get_script_instance()->call("_get_configuration_warnings");
 	}
-	return String();
+	return Array();
 }
 
-void Node::update_configuration_warning() {
+String Node::get_configuration_warnings_as_string() const {
+	TypedArray<String> warnings = get_configuration_warnings();
+	String all_warnings = String();
+	for (int i = 0; i < warnings.size(); i++) {
+		if (i > 0) {
+			all_warnings += "\n\n";
+		}
+		all_warnings += String(warnings[i]);
+	}
+	return all_warnings;
+}
+
+void Node::update_configuration_warnings() {
 #ifdef TOOLS_ENABLED
 	if (!is_inside_tree()) {
 		return;
@@ -2798,7 +2810,7 @@ void Node::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("rset_unreliable", "property", "value"), &Node::rset_unreliable);
 	ClassDB::bind_method(D_METHOD("rset_unreliable_id", "peer_id", "property", "value"), &Node::rset_unreliable_id);
 
-	ClassDB::bind_method(D_METHOD("update_configuration_warning"), &Node::update_configuration_warning);
+	ClassDB::bind_method(D_METHOD("update_configuration_warnings"), &Node::update_configuration_warnings);
 
 	BIND_CONSTANT(NOTIFICATION_ENTER_TREE);
 	BIND_CONSTANT(NOTIFICATION_EXIT_TREE);
@@ -2874,7 +2886,7 @@ void Node::_bind_methods() {
 	BIND_VMETHOD(MethodInfo("_input", PropertyInfo(Variant::OBJECT, "event", PROPERTY_HINT_RESOURCE_TYPE, "InputEvent")));
 	BIND_VMETHOD(MethodInfo("_unhandled_input", PropertyInfo(Variant::OBJECT, "event", PROPERTY_HINT_RESOURCE_TYPE, "InputEvent")));
 	BIND_VMETHOD(MethodInfo("_unhandled_key_input", PropertyInfo(Variant::OBJECT, "event", PROPERTY_HINT_RESOURCE_TYPE, "InputEventKey")));
-	BIND_VMETHOD(MethodInfo(Variant::STRING, "_get_configuration_warning"));
+	BIND_VMETHOD(MethodInfo(PropertyInfo(Variant::ARRAY, "", PROPERTY_HINT_ARRAY_TYPE, "String"), "_get_configuration_warnings"));
 }
 
 String Node::_get_name_num_separator() {
diff --git a/scene/main/node.h b/scene/main/node.h
index b1e51d2aee..6ca2317d9e 100644
--- a/scene/main/node.h
+++ b/scene/main/node.h
@@ -412,9 +412,10 @@ public:
 
 	_FORCE_INLINE_ Viewport *get_viewport() const { return data.viewport; }
 
-	virtual String get_configuration_warning() const;
+	virtual TypedArray<String> get_configuration_warnings() const;
+	String get_configuration_warnings_as_string() const;
 
-	void update_configuration_warning();
+	void update_configuration_warnings();
 
 	void set_display_folded(bool p_folded);
 	bool is_displayed_folded() const;
diff --git a/scene/main/shader_globals_override.cpp b/scene/main/shader_globals_override.cpp
index b6b2982155..cb3b2cb392 100644
--- a/scene/main/shader_globals_override.cpp
+++ b/scene/main/shader_globals_override.cpp
@@ -232,7 +232,7 @@ void ShaderGlobalsOverride::_activate() {
 			}
 		}
 
-		update_configuration_warning(); //may have activated
+		update_configuration_warnings(); //may have activated
 	}
 }
 
@@ -260,17 +260,14 @@ void ShaderGlobalsOverride::_notification(int p_what) {
 	}
 }
 
-String ShaderGlobalsOverride::get_configuration_warning() const {
-	String warning = Node::get_configuration_warning();
+TypedArray<String> ShaderGlobalsOverride::get_configuration_warnings() const {
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (!active) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("ShaderGlobalsOverride is not active because another node of the same type is in the scene.");
+		warnings.push_back(TTR("ShaderGlobalsOverride is not active because another node of the same type is in the scene."));
 	}
 
-	return warning;
+	return warnings;
 }
 
 void ShaderGlobalsOverride::_bind_methods() {
diff --git a/scene/main/shader_globals_override.h b/scene/main/shader_globals_override.h
index 8d8794d465..2d9c3c76bd 100644
--- a/scene/main/shader_globals_override.h
+++ b/scene/main/shader_globals_override.h
@@ -58,7 +58,7 @@ protected:
 	static void _bind_methods();
 
 public:
-	String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	ShaderGlobalsOverride();
 };
diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp
index a3effef99a..4c9ebe016e 100644
--- a/scene/main/viewport.cpp
+++ b/scene/main/viewport.cpp
@@ -3175,20 +3175,17 @@ Variant Viewport::gui_get_drag_data() const {
 	return gui.drag_data;
 }
 
-String Viewport::get_configuration_warning() const {
+TypedArray<String> Viewport::get_configuration_warnings() const {
 	/*if (get_parent() && !Object::cast_to<Control>(get_parent()) && !render_target) {
 		return TTR("This viewport is not set as render target. If you intend for it to display its contents directly to the screen, make it a child of a Control so it can obtain a size. Otherwise, make it a RenderTarget and assign its internal texture to some node for display.");
 	}*/
 
-	String warning = Node::get_configuration_warning();
+	TypedArray<String> warnings = Node::get_configuration_warnings();
 
 	if (size.x == 0 || size.y == 0) {
-		if (!warning.is_empty()) {
-			warning += "\n\n";
-		}
-		warning += TTR("Viewport size must be greater than 0 to render anything.");
+		warnings.push_back(TTR("Viewport size must be greater than 0 to render anything."));
 	}
-	return warning;
+	return warnings;
 }
 
 void Viewport::gui_reset_canvas_sort_index() {
diff --git a/scene/main/viewport.h b/scene/main/viewport.h
index 8e79b50385..e8a88debf1 100644
--- a/scene/main/viewport.h
+++ b/scene/main/viewport.h
@@ -580,7 +580,7 @@ public:
 	void gui_reset_canvas_sort_index();
 	int gui_get_canvas_sort_index();
 
-	virtual String get_configuration_warning() const override;
+	TypedArray<String> get_configuration_warnings() const override;
 
 	void set_debug_draw(DebugDraw p_debug_draw);
 	DebugDraw get_debug_draw() const;
diff --git a/scene/resources/default_theme/default_theme.cpp b/scene/resources/default_theme/default_theme.cpp
index 85d097aa19..f05b43377f 100644
--- a/scene/resources/default_theme/default_theme.cpp
+++ b/scene/resources/default_theme/default_theme.cpp
@@ -891,6 +891,7 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const
 	theme->set_icon("preset_bg", "ColorPicker", make_icon(mini_checkerboard_png));
 	theme->set_icon("overbright_indicator", "ColorPicker", make_icon(overbright_indicator_png));
 	theme->set_icon("bar_arrow", "ColorPicker", make_icon(bar_arrow_png));
+	theme->set_icon("picker_cursor", "ColorPicker", make_icon(picker_cursor_png));
 
 	theme->set_icon("bg", "ColorPickerButton", make_icon(mini_checkerboard_png));
 
diff --git a/scene/resources/default_theme/picker_cursor.png b/scene/resources/default_theme/picker_cursor.png
new file mode 100644
index 0000000000..2f403492d2
--- /dev/null
+++ b/scene/resources/default_theme/picker_cursor.png
diff --git a/scene/resources/default_theme/theme_data.h b/scene/resources/default_theme/theme_data.h
index 5d4dbd0758..190f2a03d9 100644
--- a/scene/resources/default_theme/theme_data.h
+++ b/scene/resources/default_theme/theme_data.h
@@ -266,6 +266,10 @@ static const unsigned char panel_bg_png[] = {
 	0x89, 0x50, 0x4e, 0x47, 0xd, 0xa, 0x1a, 0xa, 0x0, 0x0, 0x0, 0xd, 0x49, 0x48, 0x44, 0x52, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x8, 0x1, 0x3, 0x0, 0x0, 0x0, 0xfe, 0xc1, 0x2c, 0xc8, 0x0, 0x0, 0x0, 0x6, 0x50, 0x4c, 0x54, 0x45, 0x25, 0x25, 0x2a, 0x35, 0x32, 0x3b, 0x4a, 0x73, 0x58, 0x4a, 0x0, 0x0, 0x0, 0xa, 0x49, 0x44, 0x41, 0x54, 0x78, 0xda, 0x63, 0x40, 0x3, 0x0, 0x0, 0x10, 0x0, 0x1, 0xb3, 0xac, 0xe2, 0xd0, 0x0, 0x0, 0x0, 0x0, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82
 };
 
+static const unsigned char picker_cursor_png[] = {
+	0x89, 0x50, 0x4e, 0x47, 0xd, 0xa, 0x1a, 0xa, 0x0, 0x0, 0x0, 0xd, 0x49, 0x48, 0x44, 0x52, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0xc, 0x8, 0x4, 0x0, 0x0, 0x0, 0xfc, 0x7c, 0x94, 0x6c, 0x0, 0x0, 0x0, 0x9, 0x70, 0x48, 0x59, 0x73, 0x0, 0x0, 0xe, 0xc3, 0x0, 0x0, 0xe, 0xc3, 0x1, 0xc7, 0x6f, 0xa8, 0x64, 0x0, 0x0, 0x0, 0x19, 0x74, 0x45, 0x58, 0x74, 0x53, 0x6f, 0x66, 0x74, 0x77, 0x61, 0x72, 0x65, 0x0, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x6e, 0x6b, 0x73, 0x63, 0x61, 0x70, 0x65, 0x2e, 0x6f, 0x72, 0x67, 0x9b, 0xee, 0x3c, 0x1a, 0x0, 0x0, 0x0, 0xb9, 0x49, 0x44, 0x41, 0x54, 0x18, 0xd3, 0x6d, 0x8f, 0x3d, 0x8a, 0xc2, 0x50, 0x18, 0x45, 0xcf, 0x6b, 0x92, 0x2a, 0x19, 0xd4, 0xa4, 0x72, 0x47, 0x3, 0x42, 0xc0, 0x9f, 0x55, 0x44, 0x17, 0x24, 0x88, 0xee, 0x24, 0x53, 0x4d, 0x7e, 0xa, 0xbf, 0x94, 0xd6, 0x71, 0x5, 0xf2, 0x5e, 0x7f, 0x2d, 0xa2, 0xa2, 0xe0, 0x29, 0xef, 0xb9, 0xcd, 0x1, 0x40, 0xb1, 0x76, 0x6a, 0x14, 0x14, 0xd4, 0x68, 0xab, 0x98, 0x11, 0xcd, 0xd5, 0xef, 0x9b, 0xac, 0x27, 0x10, 0x32, 0x3b, 0xb4, 0x32, 0xcd, 0xc7, 0x77, 0xff, 0xfb, 0xc7, 0xc0, 0x92, 0x84, 0x84, 0x82, 0xcb, 0xa2, 0x92, 0x29, 0x46, 0xbb, 0x7d, 0xc3, 0xc0, 0x94, 0x27, 0x13, 0x86, 0x63, 0xa7, 0x12, 0xb5, 0x59, 0xcf, 0x8a, 0x77, 0xd6, 0xb9, 0xa9, 0x46, 0xde, 0x5, 0x92, 0xf, 0x91, 0x3a, 0x2f, 0xff, 0x4d, 0xfc, 0x38, 0xaf, 0x1b, 0x6a, 0x33, 0xa3, 0xf8, 0x10, 0x9b, 0xfc, 0xac, 0x1a, 0x6d, 0xf, 0x2d, 0x17, 0x26, 0xaf, 0x79, 0xc6, 0xf5, 0xd4, 0xa9, 0x44, 0xb1, 0x6c, 0x51, 0x31, 0xb0, 0x26, 0x25, 0x65, 0xc3, 0xb5, 0xa8, 0x64, 0x8a, 0xc6, 0x40, 0x3b, 0x76, 0xb9, 0xb9, 0xe0, 0x42, 0x7e, 0x3e, 0x75, 0x8f, 0x40, 0x0, 0x45, 0x2a, 0x55, 0xcb, 0xcb, 0xeb, 0x5f, 0xa5, 0x22, 0x80, 0x3b, 0xa0, 0x2c, 0x6c, 0xa1, 0x40, 0x2f, 0xda, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82
+};
+
 static const unsigned char popup_bg_png[] = {
 	0x89, 0x50, 0x4e, 0x47, 0xd, 0xa, 0x1a, 0xa, 0x0, 0x0, 0x0, 0xd, 0x49, 0x48, 0x44, 0x52, 0x0, 0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x10, 0x8, 0x3, 0x0, 0x0, 0x0, 0x28, 0x2d, 0xf, 0x53, 0x0, 0x0, 0x0, 0xa2, 0x50, 0x4c, 0x54, 0x45, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3b, 0x3b, 0x43, 0x42, 0x42, 0x4b, 0x3e, 0x3e, 0x47, 0x3e, 0x3e, 0x46, 0x41, 0x41, 0x4a, 0x0, 0x0, 0x0, 0x3d, 0x3d, 0x45, 0x3b, 0x3b, 0x43, 0x3a, 0x3a, 0x42, 0x38, 0x38, 0x41, 0x37, 0x37, 0x3e, 0x36, 0x36, 0x3d, 0x35, 0x35, 0x3c, 0x0, 0x0, 0x0, 0x38, 0x38, 0x40, 0x38, 0x38, 0x40, 0x31, 0x31, 0x38, 0x34, 0x34, 0x3b, 0x34, 0x34, 0x3b, 0x39, 0x39, 0x3f, 0x31, 0x31, 0x38, 0x2f, 0x2f, 0x36, 0x2d, 0x2d, 0x33, 0x2c, 0x2c, 0x32, 0x2b, 0x2b, 0x31, 0x2a, 0x2a, 0x31, 0x2a, 0x2a, 0x30, 0x29, 0x29, 0x30, 0x29, 0x29, 0x2f, 0x28, 0x28, 0x2e, 0x28, 0x28, 0x2d, 0x27, 0x27, 0x2d, 0x27, 0x27, 0x2c, 0x29, 0x29, 0x2e, 0x26, 0x26, 0x2c, 0x36, 0xc6, 0xc8, 0x93, 0x0, 0x0, 0x0, 0x28, 0x74, 0x52, 0x4e, 0x53, 0x0, 0x1, 0x3, 0x5, 0x8, 0xa, 0xb, 0x4, 0x13, 0x19, 0x1f, 0x22, 0x23, 0x16, 0x27, 0x35, 0x3f, 0x45, 0x46, 0x94, 0xf5, 0xfa, 0xfb, 0xf5, 0x40, 0xfc, 0xfb, 0xfb, 0xfb, 0xfb, 0xfc, 0xfc, 0x1a, 0xf5, 0xf6, 0x95, 0xfa, 0xfb, 0xf4, 0x94, 0x71, 0xda, 0xac, 0x92, 0x0, 0x0, 0x0, 0x7f, 0x49, 0x44, 0x41, 0x54, 0x78, 0xda, 0x65, 0x8f, 0x35, 0x82, 0xc3, 0x0, 0xc, 0x4, 0x77, 0x24, 0x85, 0xba, 0xe3, 0xff, 0xff, 0xee, 0xca, 0x74, 0x41, 0xdb, 0x32, 0xf3, 0x94, 0x82, 0x85, 0x10, 0x1d, 0x92, 0xb2, 0x3, 0x8e, 0x95, 0x77, 0x93, 0x6c, 0x28, 0xed, 0x15, 0x54, 0x67, 0xa6, 0x41, 0x3e, 0x8, 0x9c, 0xc3, 0xf4, 0xf2, 0xf6, 0x2a, 0x80, 0xf8, 0x44, 0x2d, 0x79, 0x2d, 0x20, 0xe0, 0x2, 0xa8, 0xc3, 0x2e, 0x6f, 0xc, 0x9e, 0x4c, 0x3c, 0x21, 0x4, 0xd8, 0xf0, 0x2, 0x28, 0x24, 0xcd, 0x3, 0xa9, 0x19, 0x64, 0xce, 0x83, 0x4c, 0x45, 0xe6, 0x69, 0x1a, 0xd8, 0xe9, 0x99, 0x96, 0x7f, 0x77, 0x37, 0x59, 0x83, 0xcc, 0xef, 0x7f, 0x89, 0x1f, 0x8e, 0xbf, 0x95, 0xd3, 0x1d, 0xf0, 0xff, 0x7a, 0x63, 0x7e, 0x86, 0xcb, 0x73, 0x8c, 0x5e, 0xee, 0xca, 0xb1, 0xad, 0x5f, 0x3, 0xaf, 0xdb, 0x49, 0x94, 0x4b, 0x90, 0x40, 0xdf, 0x0, 0x0, 0x0, 0x0, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82
 };
diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp
index 0d02bde90d..5647856736 100644
--- a/scene/resources/material.cpp
+++ b/scene/resources/material.cpp
@@ -829,16 +829,26 @@ void BaseMaterial3D::_update_shader() {
 	}
 
 	if (flags[FLAG_UV1_USE_TRIPLANAR]) {
-		code += "\tuv1_power_normal=pow(abs(NORMAL),vec3(uv1_blend_sharpness));\n";
+		if (flags[FLAG_UV1_USE_WORLD_TRIPLANAR]) {
+			code += "\tuv1_power_normal=pow(abs(mat3(WORLD_MATRIX) * NORMAL),vec3(uv1_blend_sharpness));\n";
+			code += "\tuv1_triplanar_pos = (WORLD_MATRIX * vec4(VERTEX, 1.0f)).xyz * uv1_scale + uv1_offset;\n";
+		} else {
+			code += "\tuv1_power_normal=pow(abs(NORMAL),vec3(uv1_blend_sharpness));\n";
+			code += "\tuv1_triplanar_pos = VERTEX * uv1_scale + uv1_offset;\n";
+		}
 		code += "\tuv1_power_normal/=dot(uv1_power_normal,vec3(1.0));\n";
-		code += "\tuv1_triplanar_pos = VERTEX * uv1_scale + uv1_offset;\n";
 		code += "\tuv1_triplanar_pos *= vec3(1.0,-1.0, 1.0);\n";
 	}
 
 	if (flags[FLAG_UV2_USE_TRIPLANAR]) {
-		code += "\tuv2_power_normal=pow(abs(NORMAL), vec3(uv2_blend_sharpness));\n";
+		if (flags[FLAG_UV2_USE_WORLD_TRIPLANAR]) {
+			code += "\tuv2_power_normal=pow(abs(mat3(WORLD_MATRIX) * NORMAL), vec3(uv2_blend_sharpness));\n";
+			code += "\tuv2_triplanar_pos = (WORLD_MATRIX * vec4(VERTEX, 1.0f)).xyz * uv2_scale + uv2_offset;\n";
+		} else {
+			code += "\tuv2_power_normal=pow(abs(NORMAL), vec3(uv2_blend_sharpness));\n";
+			code += "\tuv2_triplanar_pos = VERTEX * uv2_scale + uv2_offset;\n";
+		}
 		code += "\tuv2_power_normal/=dot(uv2_power_normal,vec3(1.0));\n";
-		code += "\tuv2_triplanar_pos = VERTEX * uv2_scale + uv2_offset;\n";
 		code += "\tuv2_triplanar_pos *= vec3(1.0,-1.0, 1.0);\n";
 	}
 
diff --git a/servers/physics_3d/body_pair_3d_sw.cpp b/servers/physics_3d/body_pair_3d_sw.cpp
index 36114c0c91..ef4dca7dcf 100644
--- a/servers/physics_3d/body_pair_3d_sw.cpp
+++ b/servers/physics_3d/body_pair_3d_sw.cpp
@@ -645,7 +645,7 @@ bool BodySoftBodyPair3DSW::setup(real_t p_step) {
 		c.depth = depth;
 
 		Vector3 j_vec = c.normal * c.acc_normal_impulse + c.acc_tangent_impulse;
-		body->apply_impulse(c.rA + body->get_center_of_mass(), -j_vec);
+		body->apply_impulse(-j_vec, c.rA + body->get_center_of_mass());
 		soft_body->apply_node_impulse(c.index_B, j_vec);
 		c.acc_bias_impulse = 0;
 		c.acc_bias_impulse_center_of_mass = 0;
@@ -691,7 +691,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			Vector3 jb = c.normal * (c.acc_bias_impulse - jbnOld);
 
-			body->apply_bias_impulse(c.rA + body->get_center_of_mass(), -jb, MAX_BIAS_ROTATION / p_step);
+			body->apply_bias_impulse(-jb, c.rA + body->get_center_of_mass(), MAX_BIAS_ROTATION / p_step);
 			soft_body->apply_node_bias_impulse(c.index_B, jb);
 
 			crbA = body->get_biased_angular_velocity().cross(c.rA);
@@ -706,8 +706,8 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 				Vector3 jb_com = c.normal * (c.acc_bias_impulse_center_of_mass - jbnOld_com);
 
-				body->apply_bias_impulse(body->get_center_of_mass(), -jb_com, 0.0f);
-				soft_body->apply_node_bias_impulse(c.index_B, -jb_com);
+				body->apply_bias_impulse(-jb_com, body->get_center_of_mass(), 0.0f);
+				soft_body->apply_node_bias_impulse(c.index_B, jb_com);
 			}
 
 			c.active = true;
@@ -726,7 +726,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			Vector3 j = c.normal * (c.acc_normal_impulse - jnOld);
 
-			body->apply_impulse(c.rA + body->get_center_of_mass(), -j);
+			body->apply_impulse(-j, c.rA + body->get_center_of_mass());
 			soft_body->apply_node_impulse(c.index_B, j);
 
 			c.active = true;
@@ -767,7 +767,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			jt = c.acc_tangent_impulse - jtOld;
 
-			body->apply_impulse(c.rA + body->get_center_of_mass(), -jt);
+			body->apply_impulse(-jt, c.rA + body->get_center_of_mass());
 			soft_body->apply_node_impulse(c.index_B, jt);
 
 			c.active = true;
diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
index cdff3139eb..bcdefea567 100644
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
@@ -941,7 +941,7 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 					uses_lightmap = true;
 				}
 
-			} else if (!low_end) {
+			} else {
 				if (p_using_opaque_gi) {
 					flags |= INSTANCE_DATA_FLAG_USE_GI_BUFFERS;
 				}
@@ -1133,7 +1133,7 @@ void RenderForwardClustered::_render_scene(RID p_render_buffer, const Transform
 
 		opaque_framebuffer = render_buffer->color_fb;
 
-		if (!low_end && p_gi_probes.size() > 0) {
+		if (p_gi_probes.size() > 0) {
 			using_giprobe = true;
 		}
 
@@ -1212,7 +1212,7 @@ void RenderForwardClustered::_render_scene(RID p_render_buffer, const Transform
 
 	RD::get_singleton()->draw_command_end_label();
 
-	bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED;
+	bool using_sss = render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED;
 
 	if (using_sss) {
 		using_separate_specular = true;
@@ -1296,7 +1296,7 @@ void RenderForwardClustered::_render_scene(RID p_render_buffer, const Transform
 
 	bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION;
 	bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES;
-	bool depth_pre_pass = !low_end && depth_framebuffer.is_valid();
+	bool depth_pre_pass = depth_framebuffer.is_valid();
 
 	bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment);
 	bool continue_depth = false;
@@ -1903,7 +1903,7 @@ void RenderForwardClustered::_update_render_base_uniform_set() {
 			uniforms.push_back(u);
 		}
 
-		if (!low_end) {
+		{
 			RD::Uniform u;
 			u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
 			u.binding = 13;
@@ -2065,7 +2065,7 @@ RID RenderForwardClustered::_setup_render_pass_uniform_set(RenderListType p_rend
 		uniforms.push_back(u);
 	}
 
-	if (!low_end) {
+	{
 		{
 			RD::Uniform u;
 			u.binding = 11;
@@ -2616,7 +2616,7 @@ void RenderForwardClustered::_geometry_instance_update(GeometryInstance *p_geome
 	ginstance->store_transform_cache = store_transform;
 	ginstance->can_sdfgi = false;
 
-	if (!lightmap_instance_is_valid(ginstance->lightmap_instance) && !low_end) {
+	if (!lightmap_instance_is_valid(ginstance->lightmap_instance)) {
 		if (ginstance->gi_probes[0].is_null() && (ginstance->data->use_baked_light || ginstance->data->use_dynamic_gi)) {
 			ginstance->can_sdfgi = true;
 		}
@@ -2843,10 +2843,6 @@ RenderForwardClustered::RenderForwardClustered(RendererStorageRD *p_storage) :
 
 	{
 		String defines;
-		if (low_end) {
-			defines += "\n#define LOW_END_MODE \n";
-		}
-
 		defines += "\n#define MAX_ROUGHNESS_LOD " + itos(get_roughness_layers() - 1) + ".0\n";
 		if (is_using_radiance_cubemap_array()) {
 			defines += "\n#define USE_RADIANCE_CUBEMAP_ARRAY \n";
@@ -2856,7 +2852,7 @@ RenderForwardClustered::RenderForwardClustered(RendererStorageRD *p_storage) :
 
 		{
 			//lightmaps
-			scene_state.max_lightmaps = low_end ? 2 : MAX_LIGHTMAPS;
+			scene_state.max_lightmaps = MAX_LIGHTMAPS;
 			defines += "\n#define MAX_LIGHTMAP_TEXTURES " + itos(scene_state.max_lightmaps) + "\n";
 			defines += "\n#define MAX_LIGHTMAPS " + itos(scene_state.max_lightmaps) + "\n";
 
@@ -2872,7 +2868,7 @@ RenderForwardClustered::RenderForwardClustered(RendererStorageRD *p_storage) :
 			defines += "\n#define MATERIAL_UNIFORM_SET " + itos(MATERIAL_UNIFORM_SET) + "\n";
 		}
 
-		scene_shader.init(p_storage, defines, low_end);
+		scene_shader.init(p_storage, defines);
 	}
 
 	render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances");
diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
index cb8c6e0cf3..45f6384b5e 100644
--- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
@@ -544,7 +544,7 @@ SceneShaderForwardClustered::~SceneShaderForwardClustered() {
 	storage->free(default_material);
 }
 
-void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const String p_defines, bool p_is_low_end) {
+void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const String p_defines) {
 	storage = p_storage;
 
 	{
@@ -562,6 +562,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin
 		shader_versions.push_back("\n#define MODE_MULTIPLE_RENDER_TARGETS\n#define USE_LIGHTMAP\n");
 		shader.initialize(shader_versions, p_defines);
 
+		/*
 		if (p_is_low_end) {
 			//disable the high end versions
 			shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, false);
@@ -571,6 +572,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin
 			shader.set_variant_enabled(SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR, false);
 			shader.set_variant_enabled(SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR, false);
 		}
+		*/
 	}
 
 	storage->shader_set_data_request_function(RendererStorageRD::SHADER_TYPE_3D, _create_shader_funcs);
@@ -764,9 +766,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin
 
 		MaterialData *md = (MaterialData *)storage->material_get_data(default_material, RendererStorageRD::SHADER_TYPE_3D);
 		default_shader_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_COLOR_PASS);
-		if (!p_is_low_end) {
-			default_shader_sdfgi_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_DEPTH_PASS_WITH_SDF);
-		}
+		default_shader_sdfgi_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_DEPTH_PASS_WITH_SDF);
 	}
 
 	{
diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h
index 368340e258..953a5291c8 100644
--- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h
+++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h
@@ -203,7 +203,7 @@ public:
 	SceneShaderForwardClustered();
 	~SceneShaderForwardClustered();
 
-	void init(RendererStorageRD *p_storage, const String p_defines, bool p_is_low_end);
+	void init(RendererStorageRD *p_storage, const String p_defines);
 };
 
 } // namespace RendererSceneRenderImplementation
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
index 4c5bded2ff..ca9e014c95 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
@@ -317,7 +317,7 @@ void RendererSceneRenderRD::environment_set_sdfgi(RID p_env, bool p_enable, RS::
 	RendererSceneEnvironmentRD *env = environment_owner.getornull(p_env);
 	ERR_FAIL_COND(!env);
 
-	if (low_end) {
+	if (!is_dynamic_gi_supported()) {
 		return;
 	}
 
@@ -379,7 +379,7 @@ void RendererSceneRenderRD::environment_set_volumetric_fog(RID p_env, bool p_ena
 	RendererSceneEnvironmentRD *env = environment_owner.getornull(p_env);
 	ERR_FAIL_COND(!env);
 
-	if (low_end) {
+	if (!is_volumetric_supported()) {
 		return;
 	}
 
@@ -410,10 +410,6 @@ void RendererSceneRenderRD::environment_set_ssr(RID p_env, bool p_enable, int p_
 	RendererSceneEnvironmentRD *env = environment_owner.getornull(p_env);
 	ERR_FAIL_COND(!env);
 
-	if (low_end) {
-		return;
-	}
-
 	env->set_ssr(p_enable, p_max_steps, p_fade_int, p_fade_out, p_depth_tolerance);
 }
 
@@ -429,10 +425,6 @@ void RendererSceneRenderRD::environment_set_ssao(RID p_env, bool p_enable, float
 	RendererSceneEnvironmentRD *env = environment_owner.getornull(p_env);
 	ERR_FAIL_COND(!env);
 
-	if (low_end) {
-		return;
-	}
-
 	env->set_ssao(p_enable, p_radius, p_intensity, p_power, p_detail, p_horizon, p_sharpness, p_light_affect, p_ao_channel_affect);
 }
 
@@ -1347,7 +1339,7 @@ void RendererSceneRenderRD::gi_probe_instance_set_transform_to_data(RID p_probe,
 }
 
 bool RendererSceneRenderRD::gi_probe_needs_update(RID p_probe) const {
-	if (low_end) {
+	if (!is_dynamic_gi_supported()) {
 		return false;
 	}
 
@@ -1355,7 +1347,7 @@ bool RendererSceneRenderRD::gi_probe_needs_update(RID p_probe) const {
 }
 
 void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_instances, const Vector<RID> &p_light_instances, const PagedArray<GeometryInstance *> &p_dynamic_objects) {
-	if (low_end) {
+	if (!is_dynamic_gi_supported()) {
 		return;
 	}
 
@@ -4091,11 +4083,6 @@ int RendererSceneRenderRD::get_max_directional_lights() const {
 	return cluster.max_directional_lights;
 }
 
-bool RendererSceneRenderRD::is_low_end() const {
-	// by default we switch this on this (may be ignored in some implementations)
-	return GLOBAL_GET("rendering/driver/rd_renderer/use_low_end_renderer");
-}
-
 bool RendererSceneRenderRD::is_dynamic_gi_supported() const {
 	// usable by default (unless low end = true)
 	return true;
@@ -4120,21 +4107,13 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
 	directional_shadow.size = GLOBAL_GET("rendering/shadows/directional_shadow/size");
 	directional_shadow.use_16_bits = GLOBAL_GET("rendering/shadows/directional_shadow/16_bits");
 
-	uint32_t textures_per_stage = RD::get_singleton()->limit_get(RD::LIMIT_MAX_TEXTURES_PER_SHADER_STAGE);
-
-	low_end = is_low_end();
-
-	if (textures_per_stage < 48) {
-		low_end = true;
-	}
-
 	/* SKY SHADER */
 
 	sky.init(storage);
 
 	/* GI */
 
-	if (!low_end && is_dynamic_gi_supported()) {
+	if (is_dynamic_gi_supported()) {
 		gi.init(storage, &sky);
 	}
 
@@ -4172,7 +4151,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
 		cluster.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size);
 	}
 
-	if (!low_end && is_volumetric_supported()) {
+	if (is_volumetric_supported()) {
 		String defines = "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(cluster.max_directional_lights) + "\n";
 		Vector<String> volumetric_fog_modes;
 		volumetric_fog_modes.push_back("\n#define MODE_DENSITY\n");
@@ -4230,7 +4209,7 @@ RendererSceneRenderRD::~RendererSceneRenderRD() {
 		RD::get_singleton()->free(sky.sky_scene_state.uniform_set);
 	}
 
-	if (!low_end) {
+	if (is_dynamic_gi_supported()) {
 		gi.free();
 
 		volumetric_fog.shader.version_free(volumetric_fog.shader_version);
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
index 264c0e4276..884bf2a744 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
@@ -51,7 +51,6 @@ protected:
 	RendererStorageRD *storage;
 	double time;
 	double time_step = 0;
-	bool low_end = false; // If true GI and Volumetric fog are disabled
 
 	struct RenderBufferData {
 		virtual void configure(RID p_color_buffer, RID p_depth_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa) = 0;
@@ -1190,8 +1189,6 @@ public:
 
 	void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir);
 
-	virtual bool is_low_end() const;
-
 	virtual bool is_dynamic_gi_supported() const;
 	virtual bool is_clustered_enabled() const;
 	virtual bool is_volumetric_supported() const;
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
index 7b86dac143..76edec1cb6 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
@@ -1735,8 +1735,6 @@ void sdfgi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal
 
 #ifndef MODE_RENDER_DEPTH
 
-#ifndef LOW_END_MODE
-
 vec4 volumetric_fog_process(vec2 screen_uv, float z) {
 	vec3 fog_pos = vec3(screen_uv, z * scene_data.volumetric_fog_inv_length);
 	if (fog_pos.z < 0.0) {
@@ -1747,7 +1745,6 @@ vec4 volumetric_fog_process(vec2 screen_uv, float z) {
 
 	return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos);
 }
-#endif
 
 vec4 fog_process(vec3 vertex) {
 	vec3 fog_color = scene_data.fog_light_color;
@@ -2019,7 +2016,6 @@ FRAGMENT_SHADER_CODE
 		fog = fog_process(vertex);
 	}
 
-#ifndef LOW_END_MODE
 	if (scene_data.volumetric_fog_enabled) {
 		vec4 volumetric_fog = volumetric_fog_process(screen_uv, -vertex.z);
 		if (scene_data.fog_enabled) {
@@ -2037,7 +2033,6 @@ FRAGMENT_SHADER_CODE
 			fog = volumetric_fog;
 		}
 	}
-#endif //!LOW_END_MODE
 #endif //!CUSTOM_FOG_USED
 
 	uint fog_rg = packHalf2x16(fog.rg);
@@ -2377,7 +2372,7 @@ FRAGMENT_SHADER_CODE
 		specular_light = spec_accum.rgb;
 		ambient_light = amb_accum.rgb;
 	}
-#elif !defined(LOW_END_MODE)
+#else
 
 	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers
 
@@ -2412,13 +2407,11 @@ FRAGMENT_SHADER_CODE
 	}
 #endif
 
-#ifndef LOW_END_MODE
 	if (scene_data.ssao_enabled) {
 		float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r;
 		ao = min(ao, ssao);
 		ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect);
 	}
-#endif //LOW_END_MODE
 
 	{ // process reflections
 
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl
index 4ea05c9ccc..e064a90ae0 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl
@@ -122,8 +122,6 @@ layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableDat
 }
 global_variables;
 
-#ifndef LOW_END_MODE
-
 struct SDFGIProbeCascadeData {
 	vec3 position;
 	float to_probe;
@@ -159,8 +157,6 @@ layout(set = 0, binding = 13, std140) uniform SDFGI {
 }
 sdfgi;
 
-#endif //LOW_END_MODE
-
 /* Set 2: Render Pass (changes per render pass) */
 
 layout(set = 1, binding = 0, std140) uniform SceneData {
@@ -280,9 +276,7 @@ layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas;
 
 layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
 
-#ifndef LOW_END_MOD
 layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
-#endif
 
 layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer {
 	uint data[];
@@ -306,8 +300,6 @@ layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid;
 layout(set = 1, binding = 9) uniform texture2D depth_buffer;
 layout(set = 1, binding = 10) uniform texture2D color_buffer;
 
-#ifndef LOW_END_MODE
-
 layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer;
 layout(set = 1, binding = 12) uniform texture2D ao_buffer;
 layout(set = 1, binding = 13) uniform texture2D ambient_buffer;
@@ -338,8 +330,6 @@ gi_probes;
 
 layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture;
 
-#endif // LOW_END_MODE
-
 #endif
 
 /* Set 2 Skeleton & Instancing (can change per item) */
diff --git a/servers/rendering/renderer_scene_render.h b/servers/rendering/renderer_scene_render.h
index 1dea3580b6..9ca9574f6f 100644
--- a/servers/rendering/renderer_scene_render.h
+++ b/servers/rendering/renderer_scene_render.h
@@ -241,8 +241,6 @@ public:
 
 	virtual void sdfgi_set_debug_probe_select(const Vector3 &p_position, const Vector3 &p_dir) = 0;
 
-	virtual bool is_low_end() const = 0;
-
 	virtual void update() = 0;
 	virtual ~RendererSceneRender() {}
 };
diff --git a/tests/test_command_queue.h b/tests/test_command_queue.h
index b4fa63ad2b..2f0f62f5c8 100644
--- a/tests/test_command_queue.h
+++ b/tests/test_command_queue.h
@@ -31,14 +31,14 @@
 #ifndef TEST_COMMAND_QUEUE_H
 #define TEST_COMMAND_QUEUE_H
 
-#include "test_command_queue.h"
-
 #include "core/config/project_settings.h"
+#include "core/math/random_number_generator.h"
 #include "core/os/mutex.h"
 #include "core/os/os.h"
 #include "core/os/semaphore.h"
 #include "core/os/thread.h"
 #include "core/templates/command_queue_mt.h"
+#include "test_macros.h"
 
 #if !defined(NO_THREADS)
 
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 6dca29e856..cacc0275dd 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -86,21 +86,19 @@ It is still possible to build against a system wide ENet but doing so
 will limit its functionality to IPv4 only.
 
 
-## etc2comp
+## etcpak
 
-- Upstream: https://github.com/google/etc2comp
-- Version: git (9cd0f9cae0f32338943699bb418107db61bb66f2, 2017)
-- License: Apache 2.0
-
-Files extracted from upstream source:
+- Upstream: https://github.com/wolfpld/etcpak
+- Version: git (403d38b3f1cb347c196d845d0a05e44a00d17169, 2021)
+- License: BSD-3-Clause
 
-- all .cpp and .h files in EtcLib/
-- README.md, LICENSE, AUTHORS
+Important: Some Godot-made changes, see `patches` folders.
 
-Important: Some files have Godot-made changes.
-They are marked with `// -- GODOT start --` and `// -- GODOT end --`
-comments.
+Files extracted from upstream source:
 
+- All `.cpp` and `.hpp` files in the root folder except `Application.cpp`.
+- `lz4` folder.
+- `AUTHORS.txt` and `LICENSE.txt`
 
 ## fonts
 
diff --git a/thirdparty/etc2comp/AUTHORS b/thirdparty/etc2comp/AUTHORS
deleted file mode 100644
index e78a7f4d21..0000000000
--- a/thirdparty/etc2comp/AUTHORS
+++ /dev/null
@@ -1,7 +0,0 @@
-# This is the list of Etc2Comp authors for copyright purposes.
-#
-# This does not necessarily list everyone who has contributed code, since in
-# some cases, their employer may be the copyright holder.  To see the full list
-# of contributors, see the revision history in source control.
-Google Inc.
-Blue Shift Inc.
diff --git a/thirdparty/etc2comp/Etc.cpp b/thirdparty/etc2comp/Etc.cpp
deleted file mode 100644
index a5ee706048..0000000000
--- a/thirdparty/etc2comp/Etc.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "Etc.h"
-#include "EtcFilter.h"
-
-#include <string.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	// C-style inteface to the encoder
-	//
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth, 
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uiMaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight, 
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
-	{
-
-		Image image(a_pafSourceRGBA, a_uiSourceWidth,
-					a_uiSourceHeight,
-					a_eErrMetric);
-		image.m_bVerboseOutput = a_bVerboseOutput;
-		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-		*a_ppaucEncodingBits = image.GetEncodingBits();
-		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
-		*a_puiExtendedWidth = image.GetExtendedWidth();
-		*a_puiExtendedHeight = image.GetExtendedHeight();
-		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
-	}
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmapImages,
-		int *a_piEncodingTime_ms, 
-		bool a_bVerboseOutput)
-	{
-		auto mipWidth = a_uiSourceWidth;
-		auto mipHeight = a_uiSourceHeight;
-		int totalEncodingTime = 0;
-		for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++)
-		{
-			float* pImageData = nullptr;
-			float* pMipImage = nullptr;
-
-			if(mip == 0)
-			{
-				pImageData = a_pafSourceRGBA;
-			}
-			else
-			{
-				pMipImage = new float[mipWidth*mipHeight*4];
-				if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) )
-				{
-					pImageData = pMipImage;
-				}
-			}
-
-			if ( pImageData )
-			{
-			
-				Image image(pImageData, mipWidth, mipHeight,	a_eErrMetric);
-
-			image.m_bVerboseOutput = a_bVerboseOutput;
-			image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-			a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr<unsigned char>(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; });
-			a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes();
-			a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth();
-			a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight();
-
-			totalEncodingTime += image.GetEncodingTimeMs();
-			}
-
-			if(pMipImage)
-			{
-				delete[] pMipImage;
-			}
-
-			if (!pImageData)
-			{
-				break;
-			}
-
-			mipWidth >>= 1;
-			mipHeight >>= 1;
-		}
-
-		*a_piEncodingTime_ms = totalEncodingTime;
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
diff --git a/thirdparty/etc2comp/Etc.h b/thirdparty/etc2comp/Etc.h
deleted file mode 100644
index 439388d649..0000000000
--- a/thirdparty/etc2comp/Etc.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-#include "EtcImage.h"
-#include "EtcColor.h"
-#include "EtcErrorMetric.h"
-#include <memory>
-
-#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
-#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
-#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
-
-namespace Etc
-{
-	class Block4x4EncodingBits;
-
-	struct RawImage
-	{
-		int uiExtendedWidth;
-		int uiExtendedHeight;
-		unsigned int uiEncodingBitsBytes;
-		std::shared_ptr<unsigned char> paucEncodingBits;
-	};
-
-
-
-	// C-style inteface to the encoder
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth,
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uimaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight,
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmaps,
-		int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4.cpp b/thirdparty/etc2comp/EtcBlock4x4.cpp
deleted file mode 100644
index 3082fe60db..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* 
-EtcBlock4x4.cpp
-
-Implements the state associated with each 4x4 block of pixels in an image
-
-Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
-alpha of NAN
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcColor.h"
-#include "EtcImage.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-#include "EtcBlock4x4Encoding_R11.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ETC pixels are scanned vertically.  
-	// this mapping is for when someone wants to scan the ETC pixels horizontally
-	const unsigned int Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4::Block4x4(void)
-	{
-		m_pimageSource = nullptr;
-		m_uiSourceH = 0;
-		m_uiSourceV = 0;
-
-		m_sourcealphamix = SourceAlphaMix::UNKNOWN;
-		m_boolBorderPixels = false;
-		m_boolPunchThroughPixels = false;
-
-		m_pencoding = nullptr;
-
-		m_errormetric = ErrorMetric::NUMERIC;
-
-	}
-	Block4x4::~Block4x4()
-	{
-		m_pimageSource = nullptr;
-		if (m_pencoding)
-		{
-			delete m_pencoding;
-			m_pencoding = nullptr;
-		}
-	}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding from a source image
-	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
-	// a_paucEncodingBits is the place to store the final encoding
-	// a_errormetric is used for finding the best encoding
-	//
-	void Block4x4::InitFromSource(Image *a_pimageSource, 
-									unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric)
-	{
-
-		Block4x4();
-
-		m_pimageSource = a_pimageSource;
-		m_uiSourceH = a_uiSourceH;
-		m_uiSourceV = a_uiSourceV;
-		m_errormetric = a_errormetric;
-
-		SetSourcePixels();
-
-		// set block encoder function
-		switch (m_pimageSource->GetFormat())
-		{
-		case Image::Format::ETC1:
-			m_pencoding = new Block4x4Encoding_ETC1;
-			break;
-
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_pencoding = new Block4x4Encoding_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			if (a_errormetric == RGBX)
-			{
-				m_pencoding = new Block4x4Encoding_RGBA8;
-			}
-			else
-			{
-				switch (m_sourcealphamix)
-				{
-				case SourceAlphaMix::OPAQUE:
-					m_pencoding = new Block4x4Encoding_RGBA8_Opaque;
-					break;
-
-				case SourceAlphaMix::TRANSPARENT:
-					m_pencoding = new Block4x4Encoding_RGBA8_Transparent;
-					break;
-
-				case SourceAlphaMix::TRANSLUCENT:
-					m_pencoding = new Block4x4Encoding_RGBA8;
-					break;
-
-				default:
-					assert(0);
-					break;
-				}
-				break;
-			}
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			switch (m_sourcealphamix)
-			{
-			case SourceAlphaMix::OPAQUE:
-				m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
-				break;
-
-			case SourceAlphaMix::TRANSPARENT:
-				m_pencoding = new Block4x4Encoding_RGB8A1_Transparent;
-				break;
-
-			case SourceAlphaMix::TRANSLUCENT:
-				if (m_boolPunchThroughPixels)
-				{
-					m_pencoding = new Block4x4Encoding_RGB8A1;
-				}
-				else
-				{
-					m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
-				}
-				break;
-
-			default:
-				assert(0);
-				break;
-			}
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			m_pencoding = new Block4x4Encoding_R11;
-			break;
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			m_pencoding = new Block4x4Encoding_RG11;
-			break;
-		default:
-			assert(0);
-			break;
-		}
-
-		m_pencoding->InitFromSource(this, m_afrgbaSource,
-									a_paucEncodingBits, a_errormetric);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization of encoding state from a prior encoding using encoding bits
-	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
-	// a_paucEncodingBits is the place to read the prior encoding
-	// a_imageformat is used to determine how to interpret a_paucEncodingBits
-	// a_errormetric was used for the prior encoding
-	//
-	void Block4x4::InitFromEtcEncodingBits(Image::Format a_imageformat,
-											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-											unsigned char *a_paucEncodingBits,
-											Image *a_pimageSource,
-											ErrorMetric a_errormetric)
-	{
-		Block4x4();
-
-		m_pimageSource = a_pimageSource;
-		m_uiSourceH = a_uiSourceH;
-		m_uiSourceV = a_uiSourceV;
-		m_errormetric = a_errormetric;
-
-		SetSourcePixels();
-
-		// set block encoder function
-		switch (a_imageformat)
-		{
-		case Image::Format::ETC1:
-			m_pencoding = new Block4x4Encoding_ETC1;
-			break;
-
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_pencoding = new Block4x4Encoding_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			m_pencoding = new Block4x4Encoding_RGBA8;
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			m_pencoding = new Block4x4Encoding_RGB8A1;
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			m_pencoding = new Block4x4Encoding_R11;
-			break;
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			m_pencoding = new Block4x4Encoding_RG11;
-			break;
-		default:
-			assert(0);
-			break;
-		}
-
-		m_pencoding->InitFromEncodingBits(this, a_paucEncodingBits, m_afrgbaSource,
-										m_pimageSource->GetErrorMetric());
-
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// set source pixels from m_pimageSource
-	// set m_alphamix
-	//
-	void Block4x4::SetSourcePixels(void)
-	{
-
-		Image::Format imageformat = m_pimageSource->GetFormat();
-
-		// alpha census
-		unsigned int uiTransparentSourcePixels = 0;
-		unsigned int uiOpaqueSourcePixels = 0;
-
-		// copy source to consecutive memory locations
-		// convert from image horizontal scan to block vertical scan
-		unsigned int uiPixel = 0;
-		for (unsigned int uiBlockPixelH = 0; uiBlockPixelH < Block4x4::COLUMNS; uiBlockPixelH++)
-		{
-			unsigned int uiSourcePixelH = m_uiSourceH + uiBlockPixelH;
-
-			for (unsigned int uiBlockPixelV = 0; uiBlockPixelV < Block4x4::ROWS; uiBlockPixelV++)
-			{
-				unsigned int uiSourcePixelV = m_uiSourceV + uiBlockPixelV;
-
-				ColorFloatRGBA *pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
-
-				// if pixel extends beyond source image because of block padding
-				if (pfrgbaSource == nullptr)
-				{
-					m_afrgbaSource[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, NAN);	// denotes border pixel
-					m_boolBorderPixels = true;
-					uiTransparentSourcePixels++;
-				}
-				else
-				{
-					//get teh current pixel data, and store some of the attributes
-					//before capping values to fit the encoder type
-					
-					m_afrgbaSource[uiPixel] = (*pfrgbaSource).ClampRGBA();
-
-					if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX)
-					{
-						m_pimageSource->m_iNumOpaquePixels++;
-					}
-					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
-					{
-						m_pimageSource->m_iNumTransparentPixels++;
-					}
-					else if(m_afrgbaSource[uiPixel].fA > 0.0f && m_afrgbaSource[uiPixel].fA < 1.0f)
-					{
-						m_pimageSource->m_iNumTranslucentPixels++;
-					}
-					else
-					{
-						m_pimageSource->m_numOutOfRangeValues.fA++;
-					}
-
-					if (m_afrgbaSource[uiPixel].fR != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fR++;
-						//make sure we are getting a float between 0-1
-						if (m_afrgbaSource[uiPixel].fR - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fR++;
-						}
-					}
-
-					if (m_afrgbaSource[uiPixel].fG != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fG++;
-						if (m_afrgbaSource[uiPixel].fG - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fG++;
-						}
-					}
-					if (m_afrgbaSource[uiPixel].fB != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fB++;
-						if (m_afrgbaSource[uiPixel].fB - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fB++;
-						}
-					}
-					// for formats with no alpha, set source alpha to 1
-					if (imageformat == Image::Format::ETC1 ||
-						imageformat == Image::Format::RGB8 ||
-						imageformat == Image::Format::SRGB8)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-					}
-
-					if (imageformat == Image::Format::R11 ||
-						imageformat == Image::Format::SIGNED_R11)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-						m_afrgbaSource[uiPixel].fG = 0.0f;
-						m_afrgbaSource[uiPixel].fB = 0.0f;
-					}
-
-					if (imageformat == Image::Format::RG11 ||
-						imageformat == Image::Format::SIGNED_RG11)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-						m_afrgbaSource[uiPixel].fB = 0.0f;
-					}
-
-				
-					// for RGB8A1, set source alpha to 0.0 or 1.0
-					// set punch through flag
-					if (imageformat == Image::Format::RGB8A1 ||
-						imageformat == Image::Format::SRGB8A1)
-					{
-						if (m_afrgbaSource[uiPixel].fA >= 0.5f)
-						{
-							m_afrgbaSource[uiPixel].fA = 1.0f;
-						}
-						else
-						{
-							m_afrgbaSource[uiPixel].fA = 0.0f;
-							m_boolPunchThroughPixels = true;
-						}
-					}
-
-					if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX)
-					{
-						uiOpaqueSourcePixels++;
-					}
-					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
-					{
-						uiTransparentSourcePixels++;
-					}
-
-				}
-
-				uiPixel += 1;
-			}
-		}
-
-		if (uiOpaqueSourcePixels == PIXELS)
-		{
-			m_sourcealphamix = SourceAlphaMix::OPAQUE;
-		}
-		else if (uiTransparentSourcePixels == PIXELS)
-		{
-			m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
-		}
-		else
-		{
-			m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a name for the encoding mode
-	//
-	const char * Block4x4::GetEncodingModeName(void)
-	{
-
-		switch (m_pencoding->GetMode())
-		{
-		case Block4x4Encoding::MODE_ETC1:
-			return "ETC1";
-		case Block4x4Encoding::MODE_T:
-			return "T";
-		case Block4x4Encoding::MODE_H:
-			return "H";
-		case Block4x4Encoding::MODE_PLANAR:
-			return "PLANAR";
-		default:
-			return "???";
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4.h b/thirdparty/etc2comp/EtcBlock4x4.h
deleted file mode 100644
index 0fd30c598d..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColor.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcErrorMetric.h"
-#include "EtcImage.h"
-#include "EtcBlock4x4Encoding.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits;
-
-	class Block4x4
-	{
-	public:
-
-		static const unsigned int ROWS = 4;
-		static const unsigned int COLUMNS = 4;
-		static const unsigned int PIXELS = ROWS * COLUMNS;
-
-		// the alpha mix for a 4x4 block of pixels
-		enum class SourceAlphaMix
-		{
-			UNKNOWN,
-			//
-			OPAQUE,			// all 1.0
-			TRANSPARENT,	// all 0.0 or NAN
-			TRANSLUCENT		// not all opaque or transparent
-		};
-
-		typedef void (Block4x4::*EncoderFunctionPtr)(void);
-
-		Block4x4(void);
-		~Block4x4();
-		void InitFromSource(Image *a_pimageSource,
-							unsigned int a_uiSourceH,
-							unsigned int a_uiSourceV,
-							unsigned char *a_paucEncodingBits,
-							ErrorMetric a_errormetric);
-
-		void InitFromEtcEncodingBits(Image::Format a_imageformat,
-										unsigned int a_uiSourceH,
-										unsigned int a_uiSourceV,
-										unsigned char *a_paucEncodingBits,
-										Image *a_pimageSource,
-										ErrorMetric a_errormetric);
-
-		// return true if final iteration was performed
-		inline void PerformEncodingIteration(float a_fEffort)
-		{
-			m_pencoding->PerformIteration(a_fEffort);
-		}
-
-		inline void SetEncodingBitsFromEncoding(void)
-		{
-			m_pencoding->SetEncodingBits();
-		}
-
-		inline unsigned int GetSourceH(void)
-		{
-			return m_uiSourceH;
-		}
-
-		inline unsigned int GetSourceV(void)
-		{
-			return m_uiSourceV;
-		}
-
-		inline float GetError(void)
-		{
-			return m_pencoding->GetError();
-		}
-
-		static const unsigned int s_auiPixelOrderHScan[PIXELS];
-
-		inline ColorFloatRGBA * GetDecodedColors(void)
-		{
-			return m_pencoding->GetDecodedColors();
-		}
-
-		inline float * GetDecodedAlphas(void)
-		{
-			return m_pencoding->GetDecodedAlphas();
-		}
-
-		inline Block4x4Encoding::Mode GetEncodingMode(void)
-		{
-			return m_pencoding->GetMode();
-		}
-
-		inline bool GetFlip(void)
-		{
-			return m_pencoding->GetFlip();
-		}
-
-		inline bool IsDifferential(void)
-		{
-			return m_pencoding->IsDifferential();
-		}
-
-		inline ColorFloatRGBA * GetSource()
-		{
-			return m_afrgbaSource;
-		}
-
-		inline ErrorMetric GetErrorMetric()
-		{
-			return m_errormetric;
-		}
-
-		const char * GetEncodingModeName(void);
-
-		inline Block4x4Encoding * GetEncoding(void)
-		{
-			return m_pencoding;
-		}
-
-		inline SourceAlphaMix GetSourceAlphaMix(void)
-		{
-			return m_sourcealphamix;
-		}
-
-		inline Image * GetImageSource(void)
-		{
-			return m_pimageSource;
-		}
-
-		inline bool HasBorderPixels(void)
-		{
-			return m_boolBorderPixels;
-		}
-
-		inline bool HasPunchThroughPixels(void)
-		{
-			return m_boolPunchThroughPixels;
-		}
-
-	private:
-
-		void SetSourcePixels(void);
-
-		Image				*m_pimageSource;
-		unsigned int		m_uiSourceH;
-		unsigned int		m_uiSourceV;
-		ErrorMetric			m_errormetric;
-		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan
-
-		SourceAlphaMix		m_sourcealphamix;
-		bool				m_boolBorderPixels;			// marked as rgba(NAN, NAN, NAN, NAN)
-		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
-
-		Block4x4Encoding	*m_pencoding;
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp
deleted file mode 100644
index 7a9e68c4cf..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding.cpp
-
-Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
-particular file format (e.g. ETC1, RGB8, RGBA8, R11)
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	//
-	const float Block4x4Encoding::LUMA_WEIGHT = 3.0f;
-	const float Block4x4Encoding::CHROMA_BLUE_WEIGHT = 0.5f;
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding::Block4x4Encoding(void)
-	{
-
-		m_pblockParent = nullptr;
-
-		m_pafrgbaSource = nullptr;
-
-		m_boolBorderPixels = false;
-
-		m_fError = -1.0f;
-
-		m_mode = MODE_UNKNOWN;
-
-		m_uiEncodingIterations = 0;
-		m_boolDone = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
-			m_afDecodedAlphas[uiPixel] = -1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialize the generic encoding for a 4x4 block
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// init the decoded pixels to -1 to mark them as undefined
-	// init the error to -1 to mark it as undefined
-	//
-	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
-								ColorFloatRGBA *a_pafrgbaSource,
-								ErrorMetric a_errormetric)
-	{
-
-		m_pblockParent = a_pblockParent;
-
-		m_pafrgbaSource = a_pafrgbaSource;
-
-		m_boolBorderPixels = m_pblockParent->HasBorderPixels();
-
-		m_fError = -1.0f;
-
-		m_uiEncodingIterations = 0;
-
-		m_errormetric = a_errormetric;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
-			m_afDecodedAlphas[uiPixel] = -1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the error for the block by summing the pixel errors
-	//
-	void Block4x4Encoding::CalcBlockError(void)
-	{
-		m_fError = 0.0f;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], m_afDecodedAlphas[uiPixel],
-										m_pafrgbaSource[uiPixel]);
-		}
-		
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the error between the source pixel and the decoded pixel
-	// the error amount is base on the error metric
-	//
-	float Block4x4Encoding::CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
-											ColorFloatRGBA a_frgbaSourcePixel)
-	{
-
-		// if a border pixel
-		if (isnan(a_frgbaSourcePixel.fA))
-		{
-			return 0.0f;
-		}
-
-		if (m_errormetric == ErrorMetric::RGBA)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDRed = (a_fDecodedAlpha * a_frgbaDecodedColor.fR) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fR);
-			float fDGreen = (a_fDecodedAlpha * a_frgbaDecodedColor.fG) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fG);
-			float fDBlue = (a_fDecodedAlpha * a_frgbaDecodedColor.fB) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fB);
-
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
-		}
-		else if (m_errormetric == ErrorMetric::RGBX)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDRed = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
-			float fDGreen = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
-			float fDBlue = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
-		}
-		else if (m_errormetric == ErrorMetric::REC709)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fLuma1 = a_frgbaSourcePixel.fR*0.2126f + a_frgbaSourcePixel.fG*0.7152f + a_frgbaSourcePixel.fB*0.0722f;
-			float fChromaR1 = 0.5f * ((a_frgbaSourcePixel.fR - fLuma1) * (1.0f / (1.0f - 0.2126f)));
-			float fChromaB1 = 0.5f * ((a_frgbaSourcePixel.fB - fLuma1) * (1.0f / (1.0f - 0.0722f)));
-
-			float fLuma2 = a_frgbaDecodedColor.fR*0.2126f +
-							a_frgbaDecodedColor.fG*0.7152f +
-							a_frgbaDecodedColor.fB*0.0722f;
-			float fChromaR2 = 0.5f * ((a_frgbaDecodedColor.fR - fLuma2) * (1.0f / (1.0f - 0.2126f)));
-			float fChromaB2 = 0.5f * ((a_frgbaDecodedColor.fB - fLuma2) * (1.0f / (1.0f - 0.0722f)));
-
-			float fDeltaL = a_frgbaSourcePixel.fA * fLuma1 - a_fDecodedAlpha * fLuma2;
-			float fDeltaCr = a_frgbaSourcePixel.fA * fChromaR1 - a_fDecodedAlpha * fChromaR2;
-			float fDeltaCb = a_frgbaSourcePixel.fA * fChromaB1 - a_fDecodedAlpha * fChromaB2;
-
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			// Favor Luma accuracy over Chroma, and Red over Blue 
-			return LUMA_WEIGHT*fDeltaL*fDeltaL +
-					fDeltaCr*fDeltaCr +
-					CHROMA_BLUE_WEIGHT*fDeltaCb*fDeltaCb +
-					fDAlpha*fDAlpha;
-	#if 0
-			float fDRed = a_frgbaDecodedPixel.fR - a_frgbaSourcePixel.fR;
-			float fDGreen = a_frgbaDecodedPixel.fG - a_frgbaSourcePixel.fG;
-			float fDBlue = a_frgbaDecodedPixel.fB - a_frgbaSourcePixel.fB;
-			return 2.0f * 3.0f * fDeltaL * fDeltaL + fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue;
-#endif
-		}
-		else if (m_errormetric == ErrorMetric::NORMALXYZ)
-		{
-			float fDecodedX = 2.0f * a_frgbaDecodedColor.fR - 1.0f;
-			float fDecodedY = 2.0f * a_frgbaDecodedColor.fG - 1.0f;
-			float fDecodedZ = 2.0f * a_frgbaDecodedColor.fB - 1.0f;
-
-			float fDecodedLength = sqrtf(fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ);
-
-			if (fDecodedLength < 0.5f)
-			{
-				return 1.0f;
-			}
-			else if (fDecodedLength == 0.0f)
-			{
-				fDecodedX = 1.0f;
-				fDecodedY = 0.0f;
-				fDecodedZ = 0.0f;
-			}
-			else
-			{
-				fDecodedX /= fDecodedLength;
-				fDecodedY /= fDecodedLength;
-				fDecodedZ /= fDecodedLength;
-			}
-
-			float fSourceX = 2.0f * a_frgbaSourcePixel.fR - 1.0f;
-			float fSourceY = 2.0f * a_frgbaSourcePixel.fG - 1.0f;
-			float fSourceZ = 2.0f * a_frgbaSourcePixel.fB - 1.0f;
-
-			float fSourceLength = sqrtf(fSourceX*fSourceX + fSourceY*fSourceY + fSourceZ*fSourceZ);
-
-			if (fSourceLength == 0.0f)
-			{
-				fSourceX = 1.0f;
-				fSourceY = 0.0f;
-				fSourceZ = 0.0f;
-			}
-			else
-			{
-				fSourceX /= fSourceLength;
-				fSourceY /= fSourceLength;
-				fSourceZ /= fSourceLength;
-			}
-
-			float fDotProduct = fSourceX*fDecodedX + fSourceY*fDecodedY + fSourceZ*fDecodedZ;
-			float fNormalizedDotProduct = 1.0f - 0.5f * (fDotProduct + 1.0f);
-			float fDotProductError = fNormalizedDotProduct * fNormalizedDotProduct;
-			
-			float fLength2 = fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ;
-			float fLength2Error = fabsf(1.0f - fLength2);
-
-			float fDeltaW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
-			float fErrorW = fDeltaW * fDeltaW;
-
-			return fDotProductError + fLength2Error + fErrorW;
-		}
-		else // ErrorMetric::NUMERIC
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDX = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
-			float fDY = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
-			float fDZ = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
-			float fDW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
-
-			return fDX*fDX + fDY*fDY + fDZ*fDZ + fDW*fDW;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
-
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.h b/thirdparty/etc2comp/EtcBlock4x4Encoding.h
deleted file mode 100644
index c14c3b8616..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-#include "EtcErrorMetric.h"
-
-#include <assert.h>
-#include <float.h>
-
-namespace Etc
-{
-	class Block4x4;
-
-	// abstract base class for specific encodings
-	class Block4x4Encoding
-	{
-	public:
-
-		static const unsigned int ROWS = 4;
-		static const unsigned int COLUMNS = 4;
-		static const unsigned int PIXELS = ROWS * COLUMNS;
-		static const float LUMA_WEIGHT;
-		static const float CHROMA_BLUE_WEIGHT;
-
-		typedef enum
-		{
-			MODE_UNKNOWN,
-			//
-			MODE_ETC1,
-			MODE_T,
-			MODE_H,
-			MODE_PLANAR,
-			MODE_R11,
-			MODE_RG11,
-			//
-			MODES
-		} Mode;
-
-		Block4x4Encoding(void);
-		//virtual ~Block4x4Encoding(void) =0;
-		virtual ~Block4x4Encoding(void) {}
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-
-									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) = 0;
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-
-											ErrorMetric a_errormetric) = 0;
-
-		// perform an iteration of the encoding
-		// the first iteration must generate a complete, valid (if poor) encoding
-		virtual void PerformIteration(float a_fEffort) = 0;
-
-		void CalcBlockError(void);
-
-		inline float GetError(void)
-		{
-			assert(m_fError >= 0.0f);
-
-			return m_fError;
-		}
-
-		inline ColorFloatRGBA * GetDecodedColors(void)
-		{
-			return m_afrgbaDecodedColors;
-		}
-
-		inline float * GetDecodedAlphas(void)
-		{
-			return m_afDecodedAlphas;
-		}
-
-		virtual void SetEncodingBits(void) = 0;
-
-		virtual bool GetFlip(void) = 0;
-
-		virtual bool IsDifferential(void) = 0;
-
-		virtual bool HasSeverelyBentDifferentialColors(void) const = 0;
-
-		inline Mode GetMode(void)
-		{
-			return m_mode;
-		}
-
-		inline bool IsDone(void)
-		{
-			return m_boolDone;
-		}
-
-		inline void SetDoneIfPerfect()
-		{
-			if (GetError() == 0.0f)
-			{
-				m_boolDone = true;
-			}
-		}
-
-		float CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
-								ColorFloatRGBA a_frgbaSourcePixel);
-
-	protected:
-
-		void Init(Block4x4 *a_pblockParent,
-					ColorFloatRGBA *a_pafrgbaSource,
-
-					ErrorMetric a_errormetric);
-
-		Block4x4		*m_pblockParent;
-		ColorFloatRGBA	*m_pafrgbaSource;
-
-		bool			m_boolBorderPixels;				// if block has any border pixels
-
-		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
-		float			m_afDecodedAlphas[PIXELS];		// decoded alpha component
-		float			m_fError;						// error for RGBA relative to m_pafrgbaSource
-
-		// intermediate encoding
-		Mode			m_mode;
-
-		unsigned int	m_uiEncodingIterations;
-		bool			m_boolDone;						// all iterations have been done
-		ErrorMetric		m_errormetric;
-
-	private:
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h b/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h
deleted file mode 100644
index 4065700379..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4EncodingBits
-	// Base class for Block4x4EncodingBits_XXXX
-	// ################################################################################
-
-	class Block4x4EncodingBits
-	{
-	public:
-
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			RGB8,
-			RGBA8,
-			R11,
-			RG11,
-			RGB8A1,
-			//
-			FORMATS
-		};
-
-		static unsigned int GetBytesPerBlock(Format a_format)
-		{
-			switch (a_format)
-			{
-			case Format::RGB8:
-			case Format::R11:
-			case Format::RGB8A1:
-				return 8;
-				break;
-
-			case Format::RGBA8:
-			case Format::RG11:
-				return 16;
-				break;
-
-			default:
-				return 0;
-				break;
-			}
-
-		}
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_RGB8
-	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_RGB8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-
-		inline Block4x4EncodingBits_RGB8(void)
-		{
-			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
-
-			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
-			{
-				auc[uiByte] = 0;
-			}
-
-		}
-
-		typedef struct
-		{
-			unsigned red2 : 4;
-			unsigned red1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned blue2 : 4;
-			unsigned blue1 : 4;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Individual;
-
-		typedef struct
-		{
-			signed dred2 : 3;
-			unsigned red1 : 5;
-			//
-			signed dgreen2 : 3;
-			unsigned green1 : 5;
-			//
-			signed dblue2 : 3;
-			unsigned blue1 : 5;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Differential;
-
-		typedef struct
-		{
-			unsigned red1b : 2;
-			unsigned detect2 : 1;
-			unsigned red1a : 2;
-			unsigned detect1 : 3;
-			//
-			unsigned blue1 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned red2 : 4;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 2;
-			unsigned blue2 : 4;
-			//
-			unsigned int selectors;
-		} T;
-
-		typedef struct
-		{
-			unsigned green1a : 3;
-			unsigned red1 : 4;
-			unsigned detect1 : 1;
-			//
-			unsigned blue1b : 2;
-			unsigned detect3 : 1;
-			unsigned blue1a : 1;
-			unsigned green1b : 1;
-			unsigned detect2 : 3;
-			//
-			unsigned green2a : 3;
-			unsigned red2 : 4;
-			unsigned blue1c : 1;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 1;
-			unsigned blue2 : 4;
-			unsigned green2b : 1;
-			//
-			unsigned int selectors;
-		} H;
-
-		typedef struct
-		{
-			unsigned originGreen1 : 1;
-			unsigned originRed : 6;
-			unsigned detect1 : 1;
-			//
-			unsigned originBlue1 : 1;
-			unsigned originGreen2 : 6;
-			unsigned detect2 : 1;
-			//
-			unsigned originBlue3 : 2;
-			unsigned detect4 : 1;
-			unsigned originBlue2 : 2;
-			unsigned detect3 : 3;
-			//
-			unsigned horizRed2 : 1;
-			unsigned diff : 1;
-			unsigned horizRed1 : 5;
-			unsigned originBlue4 : 1;
-			//
-			unsigned horizBlue1: 1;
-			unsigned horizGreen : 7;
-			//
-			unsigned vertRed1 : 3;
-			unsigned horizBlue2 : 5;
-			//
-			unsigned vertGreen1 : 5;
-			unsigned vertRed2 : 3;
-			//
-			unsigned vertBlue : 6;
-			unsigned vertGreen2 : 2;
-		} Planar;
-
-		union
-		{
-			unsigned char auc[BYTES_PER_BLOCK];
-			unsigned long int ul;
-			Individual individual;
-			Differential differential;
-			T t;
-			H h;
-			Planar planar;
-		};
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_A8
-	// Encoding bits for the A portion of RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_A8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_R11
-	// Encoding bits for the R portion of R11
-	// ################################################################################
-
-	class Block4x4EncodingBits_R11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	class Block4x4EncodingBits_RG11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 16;
-		static const unsigned int SELECTOR_BYTES = 12;
-
-		typedef struct
-		{
-			//Red portion
-			unsigned baseR : 8;
-			unsigned tableIndexR : 4;
-			unsigned multiplierR : 4;
-			unsigned selectorsR0 : 8;
-			unsigned selectorsR1 : 8;
-			unsigned selectorsR2 : 8;
-			unsigned selectorsR3 : 8;
-			unsigned selectorsR4 : 8;
-			unsigned selectorsR5 : 8;
-			//Green portion
-			unsigned baseG : 8;
-			unsigned tableIndexG : 4;
-			unsigned multiplierG : 4;
-			unsigned selectorsG0 : 8;
-			unsigned selectorsG1 : 8;
-			unsigned selectorsG2 : 8;
-			unsigned selectorsG3 : 8;
-			unsigned selectorsG4 : 8;
-			unsigned selectorsG5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
deleted file mode 100644
index a27f74c0d5..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
+++ /dev/null
@@ -1,1281 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_ETC1.cpp
-
-Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
-used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// pixel processing order if the flip bit = 0 (horizontal split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-	// pixel processing order if the flip bit = 1 (vertical split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// pixel indices for different block halves
-	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// CW ranges that the ETC1 decoders use
-	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
-	// the first axis in the array is indexed by the CW in the encoding bits
-	// the second axis in the array is indexed by the selector bits
-	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
-	{
-		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
-		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
-		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
-		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
-		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
-		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
-		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
-		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
-	{
-		m_mode = MODE_ETC1;
-		m_boolDiff = false;
-		m_boolFlip = false;
-		m_frgbaColor1 = ColorFloatRGBA();
-		m_frgbaColor2 = ColorFloatRGBA();
-		m_uiCW1 = 0;
-		m_uiCW2 = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_auiSelectors[uiPixel] = 0;
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-		m_boolMostLikelyFlip = false;
-
-		m_fError = -1.0f;
-
-		m_fError1 = -1.0f;
-		m_fError2 = -1.0f;
-		m_boolSeverelyBentDifferentialColors = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-	}
-
-	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_ETC1::InitFromSource(Block4x4 *a_pblockParent,
-												ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-		m_fError = -1.0f;
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_ETC1::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource, 
-														ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-		m_fError = -1.0f;
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
-		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
-		if (m_boolDiff)
-		{
-			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
-			if (iR2 < 0)
-			{
-				iR2 = 0;
-			}
-			else if (iR2 > 31)
-			{
-				iR2 = 31;
-			}
-
-			int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
-			if (iG2 < 0)
-			{
-				iG2 = 0;
-			}
-			else if (iG2 > 31)
-			{
-				iG2 = 31;
-			}
-
-			int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
-			if (iB2 < 0)
-			{
-				iB2 = 0;
-			}
-			else if (iB2 > 31)
-			{
-				iB2 = 31;
-			}
-
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		}
-		else
-		{
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
-		}
-
-		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
-
-		InitFromEncodingBits_Selectors();
-
-		Decode();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// init the selectors from a prior encoding
-	//
-	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
-	{
-
-		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
-
-		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
-		{
-			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
-			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
-			unsigned int uiShift = (unsigned int)(iPixel & 7);
-
-			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
-			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
-
-			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryIndividual(m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryIndividual(!m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates1();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates2();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates3();
-			if (a_fEffort <= 99.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
-	{
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-
-		TryIndividual(m_boolMostLikelyFlip, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryIndividual(!m_boolMostLikelyFlip, 0);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// algorithm:
-	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
-	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
-	// for each half:
-	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
-	//		create an error value that is the sum of the distances from the gray line
-	// h_error is the sum of Left and Right errors
-	// v_error is the sum of Top and Bottom errors
-	//
-	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		CalculateSourceAverages();
-
-		float fLeftGrayErrorSum = 0.0f;
-		float fRightGrayErrorSum = 0.0f;
-		float fTopGrayErrorSum = 0.0f;
-		float fBottomGrayErrorSum = 0.0f;
-
-		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-		{
-			ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
-			ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
-			ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
-			ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
-
-			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
-			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
-			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
-			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
-
-			fLeftGrayErrorSum += fLeftGrayError;
-			fRightGrayErrorSum += fRightGrayError;
-			fTopGrayErrorSum += fTopGrayError;
-			fBottomGrayErrorSum += fBottomGrayError;
-		}
-
-		if (DEBUG_PRINT)
-		{
-			printf("\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
-		}
-
-		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
-	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
-	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
-	// weight the averages based on a pixel's alpha
-	//
-	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-
-		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX)
-		{
-			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
-			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
-			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
-			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
-
-			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
-			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
-			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
-			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
-		}
-		else
-		{
-			float afSourceAlpha[PIXELS];
-
-			// treat alpha NAN as 0.0f
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? 
-																		0.0f : 
-																		m_pafrgbaSource[uiPixel].fA;
-			}
-
-			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
-			}
-
-			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
-										afrgbaAlphaWeightedSource[1] +
-										afrgbaAlphaWeightedSource[4] +
-										afrgbaAlphaWeightedSource[5];
-
-			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
-										afrgbaAlphaWeightedSource[3] +
-										afrgbaAlphaWeightedSource[6] +
-										afrgbaAlphaWeightedSource[7];
-
-			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
-										afrgbaAlphaWeightedSource[9] +
-										afrgbaAlphaWeightedSource[12] +
-										afrgbaAlphaWeightedSource[13];
-
-			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
-										afrgbaAlphaWeightedSource[11] +
-										afrgbaAlphaWeightedSource[14] +
-										afrgbaAlphaWeightedSource[15];
-
-			float fWeightSumUL = afSourceAlpha[0] +
-									afSourceAlpha[1] +
-									afSourceAlpha[4] +
-									afSourceAlpha[5];
-
-			float fWeightSumLL = afSourceAlpha[2] +
-									afSourceAlpha[3] +
-									afSourceAlpha[6] +
-									afSourceAlpha[7];
-
-			float fWeightSumUR = afSourceAlpha[8] +
-									afSourceAlpha[9] +
-									afSourceAlpha[12] +
-									afSourceAlpha[13];
-
-			float fWeightSumLR = afSourceAlpha[10] +
-									afSourceAlpha[11] +
-									afSourceAlpha[14] +
-									afSourceAlpha[15];
-
-			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
-			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
-			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
-			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
-
-			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
-			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
-			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
-			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
-
-			// check to see if there is at least 1 pixel with  non-zero alpha
-			// completely transparent block should not make it to this code
-			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
-			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
-
-			if (fWeightSumLeft > 0.0f)
-			{
-				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
-			}
-			if (fWeightSumRight > 0.0f)
-			{
-				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
-			}
-			if (fWeightSumTop > 0.0f)
-			{
-				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
-			}
-			if (fWeightSumBottom > 0.0f)
-			{
-				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
-			}
-
-			if (fWeightSumLeft == 0.0f)
-			{
-				assert(fWeightSumRight > 0.0f);
-				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
-			}
-			if (fWeightSumRight == 0.0f)
-			{
-				assert(fWeightSumLeft > 0.0f);
-				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
-			}
-			if (fWeightSumTop == 0.0f)
-			{
-				assert(fWeightSumBottom > 0.0f);
-				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
-			}
-			if (fWeightSumBottom == 0.0f)
-			{
-				assert(fWeightSumTop > 0.0f);
-				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
-			}
-		}
-
-		
-
-		if (DEBUG_PRINT)
-		{
-			printf("\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
-				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
-				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
-				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
-				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-												int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; 
-				iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-				iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-					iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-					iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-						iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-						iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-																	*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{	
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 individual mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryIndividualHalf(&trys.m_half1);
-		encodingTry.TryIndividualHalf(&trys.m_half2);
-
-		// use the best of each half
-		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
-		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
-		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = false;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_fError = m_fError1 + m_fError2;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
-			iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 15);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-				iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 15);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-					iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 15);
-
-					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-										*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
-	// calculate the selectors for each half of the block separately
-	// set the block error as the sum of each half's error
-	//
-	void Block4x4Encoding_ETC1::CalculateSelectors()
-	{
-		if (m_boolFlip)
-		{
-			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
-		}
-		else
-		{
-			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
-		}
-
-		m_fError = m_fError1 + m_fError2;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// choose best selectors for half of the block
-	// calculate the error for half of the block
-	//
-	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-		const unsigned int *pauiPixelMapping)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
-		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
-
-		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
-		*pfHalfError = FLT_MAX;
-
-		// try each CW
-		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-		{
-			if (DEBUG_PRINT)
-			{
-				printf("\ncw=%u\n", uiCW);
-			}
-
-			unsigned int auiPixelSelectors[PIXELS / 2];
-			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				if (DEBUG_PRINT)
-				{
-					printf("\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
-						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
-				}
-
-				ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
-				ColorFloatRGBA frgbaDecodedPixel;
-
-				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-				{
-					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
-
-					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
-
-					float fPixelError;
-					
-					fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[pauiPixelMapping[uiPixel]],
-														*pfrgbaSourcePixel);
-					
-					if (DEBUG_PRINT)
-					{
-						printf("\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f", uiPixel, uiSelector,
-							frgbaDecodedPixel.fR,
-							frgbaDecodedPixel.fG,
-							frgbaDecodedPixel.fB,
-							fPixelError);
-					}
-
-					if (fPixelError < afPixelErrors[uiPixel])
-					{
-						if (DEBUG_PRINT)
-						{
-							printf(" *");
-						}
-
-						auiPixelSelectors[uiPixel] = uiSelector;
-						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-						afPixelErrors[uiPixel] = fPixelError;
-					}
-
-					if (DEBUG_PRINT)
-					{
-						printf("\n");
-					}
-				}
-			}
-
-			// add up all pixel errors
-			float fCWError = 0.0f;
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				fCWError += afPixelErrors[uiPixel];
-			}
-			if (DEBUG_PRINT)
-			{
-				printf("\terror %.2f\n", fCWError);
-			}
-
-			// if best CW so far
-			if (fCWError < *pfHalfError)
-			{
-				*pfHalfError = fCWError;
-				*puiCW = uiCW;
-				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-				{
-					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
-					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits(void)
-	{
-		assert(m_mode == MODE_ETC1);
-
-		if (m_boolDiff)
-		{
-			int iRed1 = m_frgbaColor1.IntRed(31.0f);
-			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-			int iRed2 = m_frgbaColor2.IntRed(31.0f);
-			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-			int iDRed2 = iRed2 - iRed1;
-			int iDGreen2 = iGreen2 - iGreen1;
-			int iDBlue2 = iBlue2 - iBlue1;
-
-			assert(iDRed2 >= -4 && iDRed2 < 4);
-			assert(iDGreen2 >= -4 && iDGreen2 < 4);
-			assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
-			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
-			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
-
-			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-		}
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
-		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the selectors in the encoding bits
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
-	{
-
-		m_pencodingbitsRGB8->individual.selectors = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiSelector = m_auiSelectors[uiPixel];
-
-			// set index msb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
-
-			// set index lsb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_ETC1::Decode(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h
deleted file mode 100644
index c0dc84d5d5..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-#include "EtcIndividualTrys.h"
-
-namespace Etc
-{
-
-	// base class for Block4x4Encoding_RGB8
-	class Block4x4Encoding_ETC1 : public Block4x4Encoding
-	{
-	public:
-
-		Block4x4Encoding_ETC1(void);
-		virtual ~Block4x4Encoding_ETC1(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource, 
-
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		inline virtual bool GetFlip(void)
-		{
-			return m_boolFlip;
-		}
-
-		inline virtual bool IsDifferential(void)
-		{
-			return m_boolDiff;
-		}
-
-		virtual void SetEncodingBits(void);
-
-		void Decode(void);
-
-		inline ColorFloatRGBA GetColor1(void) const
-		{
-			return m_frgbaColor1;
-		}
-
-		inline ColorFloatRGBA GetColor2(void) const
-		{
-			return m_frgbaColor2;
-		}
-
-		inline const unsigned int * GetSelectors(void) const
-		{
-			return m_auiSelectors;
-		}
-
-		inline unsigned int GetCW1(void) const
-		{
-			return m_uiCW1;
-		}
-
-		inline unsigned int GetCW2(void) const
-		{
-			return m_uiCW2;
-		}
-
-		inline bool HasSeverelyBentDifferentialColors(void) const
-		{
-			return m_boolSeverelyBentDifferentialColors;
-		}
-
-	protected:
-
-		static const unsigned int s_auiPixelOrderFlip0[PIXELS];
-		static const unsigned int s_auiPixelOrderFlip1[PIXELS];
-		static const unsigned int s_auiPixelOrderHScan[PIXELS];
-
-		static const unsigned int s_auiLeftPixelMapping[8];
-		static const unsigned int s_auiRightPixelMapping[8];
-		static const unsigned int s_auiTopPixelMapping[8];
-		static const unsigned int s_auiBottomPixelMapping[8];
-
-		static const unsigned int SELECTOR_BITS = 2;
-		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-		static const unsigned int CW_BITS = 3;
-		static const unsigned int CW_RANGES = 1 << CW_BITS;
-
-		static float s_aafCwTable[CW_RANGES][SELECTORS];
-		static unsigned char s_aucDifferentialCwRange[256];
-
-		static const int MAX_DIFFERENTIAL = 3;
-		static const int MIN_DIFFERENTIAL = -4;
-
-		void InitFromEncodingBits_Selectors(void);
-
-		void PerformFirstIteration(void);
-		void CalculateMostLikelyFlip(void);
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
-		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-		void CalculateSelectors();
-		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-											const unsigned int *pauiPixelMapping);
-
-		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
-		inline float CalcGrayDistance2(ColorFloatRGBA &r_frgbaPixel, 
-										ColorFloatRGBA &r_frgbaTarget)
-		{
-			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
-								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
-								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
-
-			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
-
-			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
-			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
-			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
-
-			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
-		}
-
-		void SetEncodingBits_Selectors(void);
-
-		// intermediate encoding
-		bool			m_boolDiff;
-		bool			m_boolFlip;
-		ColorFloatRGBA	m_frgbaColor1;
-		ColorFloatRGBA	m_frgbaColor2;
-		unsigned int	m_uiCW1;
-		unsigned int	m_uiCW2;
-		unsigned int	m_auiSelectors[PIXELS];
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaSourceAverageLeft;
-		ColorFloatRGBA	m_frgbaSourceAverageRight;
-		ColorFloatRGBA	m_frgbaSourceAverageTop;
-		ColorFloatRGBA	m_frgbaSourceAverageBottom;
-		bool			m_boolMostLikelyFlip;
-
-		// stats
-		float			m_fError1;	// error for Etc1 half 1
-		float			m_fError2;	// error for Etc1 half 2
-		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
-
-		// final encoding
-		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
-
-		private:
-
-		void CalculateSourceAverages(void);
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp
deleted file mode 100644
index 4c012fbbf1..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_R11.cpp
-
-Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// modifier values to use for R11, SR11, RG11 and SRG11
-	float Block4x4Encoding_R11::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS]
-	{
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
-		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-
-		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
-		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
-	{
-
-		m_pencodingbitsR11 = nullptr;
-
-	}
-
-	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_R11::InitFromSource(Block4x4 *a_pblockParent,
-		ColorFloatRGBA *a_pafrgbaSource,
-		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_R11::InitFromEncodingBits(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-			(unsigned char *)m_pencodingbitsR11,
-			a_pafrgbaSource,
-			a_errormetric);
-
-		// init R11 portion
-		{
-			m_mode = MODE_R11;
-			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-			{
-				m_fRedBase = (float)(signed char)m_pencodingbitsR11->data.base;
-			}
-			else
-			{
-				m_fRedBase = (float)(unsigned char)m_pencodingbitsR11->data.base;
-			}
-			m_fRedMultiplier = (float)m_pencodingbitsR11->data.multiplier;
-			m_uiRedModifierTableIndex = m_pencodingbitsR11->data.table;
-
-			unsigned long long int ulliSelectorBits = 0;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors0 << 40;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors1 << 32;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors2 << 24;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors3 << 16;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors4 << 8;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors5;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiRedSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (SELECTORS - 1);
-			}
-
-			// decode the red channel
-			// calc red error
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				float fDecodedPixelData = 0.0f;
-				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-				{
-					fDecodedPixelData = DecodePixelRed(m_fRedBase, m_fRedMultiplier,
-						m_uiRedModifierTableIndex,
-						m_auiRedSelectors[uiPixel]);
-				}
-				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-				{
-					fDecodedPixelData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier,
-						m_uiRedModifierTableIndex,
-						m_auiRedSelectors[uiPixel]);
-				}
-				else
-				{
-					assert(0);
-				}
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fDecodedPixelData, 0.0f, 0.0f, 1.0f);
-			}
-			CalcBlockError();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-		m_mode = MODE_R11;
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			m_fError = FLT_MAX;
-			m_fRedBlockError = FLT_MAX;		// artificially high value
-			CalculateR11(8, 0.0f, 0.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 1:
-			CalculateR11(8, 2.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			if (a_fEffort <= 24.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 2:
-			CalculateR11(8, 12.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			CalculateR11(7, 6.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 4:
-			CalculateR11(6, 3.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 5:
-			CalculateR11(5, 1.0f, 0.0f);
-			m_fError = m_fRedBlockError;
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base color, multiplier and selectors
-	//
-	// a_uiSelectorsUsed limits the number of selector combinations to try
-	// a_fBaseRadius limits the range of base colors to try
-	// a_fMultiplierRadius limits the range of multipliers to try
-	//
-	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, 
-												float a_fBaseRadius, float a_fMultiplierRadius)
-	{
-		// maps from virtual (monotonic) selector to ETC selector
-		static const unsigned int auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
-
-		// find min/max red
-		float fMinRed = 1.0f;
-		float fMaxRed = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// ignore border pixels
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			float fRed = m_pafrgbaSource[uiPixel].fR;
-
-			if (fRed < fMinRed)
-			{
-				fMinRed = fRed;
-			}
-			if (fRed > fMaxRed)
-			{
-				fMaxRed = fRed;
-			}
-		}
-		assert(fMinRed <= fMaxRed);
-
-		float fRedRange = (fMaxRed - fMinRed);
-
-		// try each modifier table entry							  
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			for (unsigned int uiMinVirtualSelector = 0; 
-					uiMinVirtualSelector <= (8- a_uiSelectorsUsed); 
-					uiMinVirtualSelector++)
-			{
-				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
-
-				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
-				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
-
-				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
-											s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-				float fCenter = fMinRed + fCenterRatio*fRedRange;
-				fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
-				if (fMinBase < 0.0f)
-				{
-					fMinBase = 0.0f;
-				}
-
-				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
-				if (fMaxBase > 1.0f)
-				{
-					fMaxBase = 1.0f;
-				}
-
-				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-				{
-					float fRangeMultiplier = roundf(fRedRange / fTableEntryRange);
-
-					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
-					if (fMinMultiplier < 1.0f)
-					{
-						fMinMultiplier = 0.0f;
-					}
-					else if (fMinMultiplier > 15.0f)
-					{
-						fMinMultiplier = 15.0f;
-					}
-
-					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
-					if (fMaxMultiplier < 1.0f)
-					{
-						fMaxMultiplier = 1.0f;
-					}
-					else if (fMaxMultiplier > 15.0f)
-					{
-						fMaxMultiplier = 15.0f;
-					}
-
-					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-					{
-						// find best selector for each pixel
-						unsigned int auiBestSelectors[PIXELS];
-						float afBestRedError[PIXELS];
-						float afBestPixelRed[PIXELS];
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							float fBestPixelRedError = FLT_MAX;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								float fPixelRed = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
-
-								ColorFloatRGBA frgba(fPixelRed, m_pafrgbaSource[uiPixel].fG,0.0f,1.0f);
-
-								float fPixelRedError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
-
-								if (fPixelRedError < fBestPixelRedError)
-								{
-									fBestPixelRedError = fPixelRedError;
-									auiBestSelectors[uiPixel] = uiSelector;
-									afBestRedError[uiPixel] = fBestPixelRedError;
-									afBestPixelRed[uiPixel] = fPixelRed;
-								}
-							}
-						}
-						float fBlockError = 0.0f;  
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							fBlockError += afBestRedError[uiPixel];
-						}
-						if (fBlockError < m_fRedBlockError)
-						{
-							m_fRedBlockError = fBlockError;
-
-							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-							{
-								m_fRedBase = 255.0f * fBase;
-							}
-							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-							{
-								m_fRedBase = (fBase * 255) - 128;
-							}
-							else
-							{
-								assert(0);
-							}
-							m_fRedMultiplier = fMultiplier;
-							m_uiRedModifierTableIndex = uiTableEntry;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiRedSelectors[uiPixel] = auiBestSelectors[uiPixel];
-								float fBestPixelRed = afBestPixelRed[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fBestPixelRed, 0.0f, 0.0f, 1.0f);
-								m_afDecodedAlphas[uiPixel] = 1.0f;
-							}
-						}
-					}
-				}
-
-			}
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_R11::SetEncodingBits(void)
-	{
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsR11->data.base = (unsigned char)roundf(m_fRedBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsR11->data.base = (signed char)roundf(m_fRedBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsR11->data.table = m_uiRedModifierTableIndex;
-		m_pencodingbitsR11->data.multiplier = (unsigned char)roundf(m_fRedMultiplier);
-
-		unsigned long long int ulliSelectorBits = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiShift = 45 - (3 * uiPixel);
-			ulliSelectorBits |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
-		}
-
-		m_pencodingbitsR11->data.selectors0 = ulliSelectorBits >> 40;
-		m_pencodingbitsR11->data.selectors1 = ulliSelectorBits >> 32;
-		m_pencodingbitsR11->data.selectors2 = ulliSelectorBits >> 24;
-		m_pencodingbitsR11->data.selectors3 = ulliSelectorBits >> 16;
-		m_pencodingbitsR11->data.selectors4 = ulliSelectorBits >> 8;
-		m_pencodingbitsR11->data.selectors5 = ulliSelectorBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h
deleted file mode 100644
index b40c1e0036..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_R11;
-
-	// ################################################################################
-	// Block4x4Encoding_R11
-	// ################################################################################
-
-	class Block4x4Encoding_R11 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		Block4x4Encoding_R11(void);
-		virtual ~Block4x4Encoding_R11(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-			ColorFloatRGBA *a_pafrgbaSource,
-			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-			unsigned char *a_paucEncodingBits,
-			ColorFloatRGBA *a_pafrgbaSource,
-			ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		inline float GetRedBase(void) const
-		{
-			return m_fRedBase;
-		}
-
-		inline float GetRedMultiplier(void) const
-		{
-			return m_fRedMultiplier;
-		}
-
-		inline int GetRedTableIndex(void) const
-		{
-			return m_uiRedModifierTableIndex;
-		}
-
-		inline const unsigned int * GetRedSelectors(void) const
-		{
-			return m_auiRedSelectors;
-		}
-
-	protected:
-
-		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
-		static const unsigned int SELECTOR_BITS = 3;
-		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS];
-
-		void CalculateR11(unsigned int a_uiSelectorsUsed, 
-							float a_fBaseRadius, float a_fMultiplierRadius);
-
-		
-
-	
-		inline float DecodePixelRed(float a_fBase, float a_fMultiplier,
-			unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-		{
-			float fMultiplier = a_fMultiplier;
-			if (fMultiplier <= 0.0f)
-			{
-				fMultiplier = 1.0f / 8.0f;
-			}
-
-			float fPixelRed = a_fBase * 8 + 4 +
-				8 * fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]*255;
-			fPixelRed /= 2047.0f;
-
-			if (fPixelRed < 0.0f)
-			{
-				fPixelRed = 0.0f;
-			}
-			else if (fPixelRed > 1.0f)
-			{
-				fPixelRed = 1.0f;
-			}
-
-			return fPixelRed;
-		}
-
-		Block4x4EncodingBits_R11 *m_pencodingbitsR11;
-
-		float m_fRedBase;
-		float m_fRedMultiplier;
-		float m_fRedBlockError;
-		unsigned int m_uiRedModifierTableIndex;
-		unsigned int m_auiRedSelectors[PIXELS];
-
-		
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp
deleted file mode 100644
index 417835db51..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RG11.cpp
-
-Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
-	{
-		m_pencodingbitsRG11 = nullptr;
-	}
-
-	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RG11::InitFromSource(Block4x4 *a_pblockParent,
-		ColorFloatRGBA *a_pafrgbaSource,
-		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RG11::InitFromEncodingBits(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-
-		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-			(unsigned char *)m_pencodingbitsRG11,
-			a_pafrgbaSource,
-			a_errormetric);
-		m_fError = 0.0f;
-
-		{
-			m_mode = MODE_RG11;
-			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-			{
-				m_fRedBase = (float)(signed char)m_pencodingbitsRG11->data.baseR;
-				m_fGrnBase = (float)(signed char)m_pencodingbitsRG11->data.baseG;
-			}
-			else
-			{
-				m_fRedBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseR;
-				m_fGrnBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseG;
-			}
-			m_fRedMultiplier = (float)m_pencodingbitsRG11->data.multiplierR;
-			m_fGrnMultiplier = (float)m_pencodingbitsRG11->data.multiplierG;
-			m_uiRedModifierTableIndex = m_pencodingbitsRG11->data.tableIndexR;
-			m_uiGrnModifierTableIndex = m_pencodingbitsRG11->data.tableIndexG;
-
-			unsigned long long int ulliSelectorBitsR = 0;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR0 << 40;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR1 << 32;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR2 << 24;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR3 << 16;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR4 << 8;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR5;
-
-			unsigned long long int ulliSelectorBitsG = 0;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG0 << 40;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG1 << 32;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG2 << 24;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG3 << 16;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG4 << 8;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG5;
-
-			
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiRedSelectors[uiPixel] = (ulliSelectorBitsR >> uiShift) & (SELECTORS - 1);
-				m_auiGrnSelectors[uiPixel] = (ulliSelectorBitsG >> uiShift) & (SELECTORS - 1);
-			}
-
-			
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				float fRedDecodedData = 0.0f;
-				float fGrnDecodedData = 0.0f;
-				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-				{
-					fRedDecodedData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
-					fGrnDecodedData = DecodePixelRed(m_fGrnBase, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
-				}
-				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-				{
-					fRedDecodedData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
-					fGrnDecodedData = DecodePixelRed(m_fGrnBase + 128, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
-				}
-				else
-				{
-					assert(0);
-				}
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fRedDecodedData, fGrnDecodedData, 0.0f, 1.0f);
-			}
-
-		}
-
-		CalcBlockError();
- 	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			m_fError = FLT_MAX;
-			m_fGrnBlockError = FLT_MAX;		// artificially high value
-			m_fRedBlockError = FLT_MAX;
-			CalculateR11(8, 0.0f, 0.0f);
-			CalculateG11(8, 0.0f, 0.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 1:
-			CalculateR11(8, 2.0f, 1.0f);
-			CalculateG11(8, 2.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			if (a_fEffort <= 24.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 2:
-			CalculateR11(8, 12.0f, 1.0f);
-			CalculateG11(8, 12.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			CalculateR11(7, 6.0f, 1.0f);
-			CalculateG11(7, 6.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 4:
-			CalculateR11(6, 3.0f, 1.0f);
-			CalculateG11(6, 3.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 5:
-			CalculateR11(5, 1.0f, 0.0f);
-			CalculateG11(5, 1.0f, 0.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base color, multiplier and selectors
-	//
-	// a_uiSelectorsUsed limits the number of selector combinations to try
-	// a_fBaseRadius limits the range of base colors to try
-	// a_fMultiplierRadius limits the range of multipliers to try
-	//
-	void Block4x4Encoding_RG11::CalculateG11(unsigned int a_uiSelectorsUsed,
-		float a_fBaseRadius, float a_fMultiplierRadius)
-	{
-		// maps from virtual (monotonic) selector to etc selector
-		static const unsigned int auiVirtualSelectorMap[8] = { 3, 2, 1, 0, 4, 5, 6, 7 };
-
-		// find min/max Grn
-		float fMinGrn = 1.0f;
-		float fMaxGrn = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// ignore border pixels
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			float fGrn = m_pafrgbaSource[uiPixel].fG;
-
-			if (fGrn < fMinGrn)
-			{
-				fMinGrn = fGrn;
-			}
-			if (fGrn > fMaxGrn)
-			{
-				fMaxGrn = fGrn;
-			}
-		}
-		assert(fMinGrn <= fMaxGrn);
-
-		float fGrnRange = (fMaxGrn - fMinGrn);
-
-		// try each modifier table entry							  
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			for (unsigned int uiMinVirtualSelector = 0;
-			uiMinVirtualSelector <= (8 - a_uiSelectorsUsed);
-				uiMinVirtualSelector++)
-			{
-				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
-
-				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
-				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
-
-				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
-					s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-				float fCenter = fMinGrn + fCenterRatio*fGrnRange;
-				fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
-				if (fMinBase < 0.0f)
-				{
-					fMinBase = 0.0f;
-				}
-
-				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
-				if (fMaxBase > 1.0f)
-				{
-					fMaxBase = 1.0f;
-				}
-
-				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-				{
-					float fRangeMultiplier = roundf(fGrnRange / fTableEntryRange);
-
-					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
-					if (fMinMultiplier < 1.0f)
-					{
-						fMinMultiplier = 0.0f;
-					}
-					else if (fMinMultiplier > 15.0f)
-					{
-						fMinMultiplier = 15.0f;
-					}
-
-					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
-					if (fMaxMultiplier < 1.0f)
-					{
-						fMaxMultiplier = 1.0f;
-					}
-					else if (fMaxMultiplier > 15.0f)
-					{
-						fMaxMultiplier = 15.0f;
-					}
-
-					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-					{
-						// find best selector for each pixel
-						unsigned int auiBestSelectors[PIXELS];
-						float afBestGrnError[PIXELS];
-						float afBestPixelGrn[PIXELS];
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							float fBestPixelGrnError = FLT_MAX;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								//DecodePixelRed is not red channel specific
-								float fPixelGrn = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
-								
-								ColorFloatRGBA frgba(m_pafrgbaSource[uiPixel].fR, fPixelGrn, 0.0f, 1.0f);
-									
-								float fPixelGrnError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
-
-								if (fPixelGrnError < fBestPixelGrnError)
-								{
-									fBestPixelGrnError = fPixelGrnError;
-									auiBestSelectors[uiPixel] = uiSelector;
-									afBestGrnError[uiPixel] = fBestPixelGrnError;
-									afBestPixelGrn[uiPixel] = fPixelGrn;
-								}
-							}
-						}
-						float fBlockError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							fBlockError += afBestGrnError[uiPixel];
-						}
-
-						if (fBlockError < m_fGrnBlockError)
-						{
-							m_fGrnBlockError = fBlockError;
-
-							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-							{
-								m_fGrnBase = 255.0f * fBase;
-							}
-							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-							{
-								m_fGrnBase = (fBase * 255) - 128;
-							}
-							else
-							{
-								assert(0);
-							}
-							m_fGrnMultiplier = fMultiplier;
-							m_uiGrnModifierTableIndex = uiTableEntry;
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiGrnSelectors[uiPixel] = auiBestSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel].fG = afBestPixelGrn[uiPixel];
-								m_afDecodedAlphas[uiPixel] = 1.0f;
-							}
-						}
-					}
-				}
-
-			}
-		}
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RG11::SetEncodingBits(void)
-	{
-		unsigned long long int ulliSelectorBitsR = 0;
-		unsigned long long int ulliSelectorBitsG = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiShift = 45 - (3 * uiPixel);
-			ulliSelectorBitsR |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
-			ulliSelectorBitsG |= ((unsigned long long int)m_auiGrnSelectors[uiPixel]) << uiShift;
-		}
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsRG11->data.baseR = (unsigned char)roundf(m_fRedBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsRG11->data.baseR = (signed char)roundf(m_fRedBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsRG11->data.tableIndexR = m_uiRedModifierTableIndex;
-		m_pencodingbitsRG11->data.multiplierR = (unsigned char)roundf(m_fRedMultiplier);
-
-		m_pencodingbitsRG11->data.selectorsR0 = ulliSelectorBitsR >> 40;
-		m_pencodingbitsRG11->data.selectorsR1 = ulliSelectorBitsR >> 32;
-		m_pencodingbitsRG11->data.selectorsR2 = ulliSelectorBitsR >> 24;
-		m_pencodingbitsRG11->data.selectorsR3 = ulliSelectorBitsR >> 16;
-		m_pencodingbitsRG11->data.selectorsR4 = ulliSelectorBitsR >> 8;
-		m_pencodingbitsRG11->data.selectorsR5 = ulliSelectorBitsR;
-
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsRG11->data.baseG = (unsigned char)roundf(m_fGrnBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsRG11->data.baseG = (signed char)roundf(m_fGrnBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsRG11->data.tableIndexG = m_uiGrnModifierTableIndex;
-		m_pencodingbitsRG11->data.multiplierG = (unsigned char)roundf(m_fGrnMultiplier);
-
-		m_pencodingbitsRG11->data.selectorsG0 = ulliSelectorBitsG >> 40;
-		m_pencodingbitsRG11->data.selectorsG1 = ulliSelectorBitsG >> 32;
-		m_pencodingbitsRG11->data.selectorsG2 = ulliSelectorBitsG >> 24;
-		m_pencodingbitsRG11->data.selectorsG3 = ulliSelectorBitsG >> 16;
-		m_pencodingbitsRG11->data.selectorsG4 = ulliSelectorBitsG >> 8;
-		m_pencodingbitsRG11->data.selectorsG5 = ulliSelectorBitsG;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h
deleted file mode 100644
index d4993b8c5f..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_RG11;
-
-	// ################################################################################
-	// Block4x4Encoding_RG11
-	// ################################################################################
-
-	class Block4x4Encoding_RG11 : public Block4x4Encoding_R11
-	{
-		float m_fGrnBase;
-		float m_fGrnMultiplier;
-		float m_fGrnBlockError;
-		unsigned int m_auiGrnSelectors[PIXELS];
-		unsigned int m_uiGrnModifierTableIndex;
-	public:
-
-		Block4x4Encoding_RG11(void);
-		virtual ~Block4x4Encoding_RG11(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-			ColorFloatRGBA *a_pafrgbaSource,
-
-			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-			unsigned char *a_paucEncodingBits,
-			ColorFloatRGBA *a_pafrgbaSource,
-
-			ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		Block4x4EncodingBits_RG11 *m_pencodingbitsRG11;
-
-		void CalculateG11(unsigned int a_uiSelectorsUsed, float a_fBaseRadius, float a_fMultiplierRadius);
-
-		inline float GetGrnBase(void) const
-		{
-			return m_fGrnBase;
-		}
-
-		inline float GetGrnMultiplier(void) const
-		{
-			return m_fGrnMultiplier;
-		}
-
-		inline int GetGrnTableIndex(void) const
-		{
-			return m_uiGrnModifierTableIndex;
-		}
-
-		inline const unsigned int * GetGrnSelectors(void) const
-		{
-			return m_auiGrnSelectors;
-		}
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
deleted file mode 100644
index 5c7ebed788..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
+++ /dev/null
@@ -1,1730 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8.cpp
-
-Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
-This encoder is also used for the ETC2 subset of file format RGBA8.
-
-Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-#include "EtcMath.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
-	{
-		3.0f / 255.0f,
-		6.0f / 255.0f,
-		11.0f / 255.0f,
-		16.0f / 255.0f,
-		23.0f / 255.0f,
-		32.0f / 255.0f,
-		41.0f / 255.0f,
-		64.0f / 255.0f
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
-	{
-
-		m_pencodingbitsRGB8 = nullptr;
-
-	}
-
-	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-		
-		// handle ETC1 modes
-		Block4x4Encoding_ETC1::InitFromEncodingBits(a_pblockParent,
-													a_paucEncodingBits, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		if (m_pencodingbitsRGB8->differential.diff)
-		{
-			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
-			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-			int iRed2 = iRed1 + iDRed2;
-
-			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
-			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-			int iGreen2 = iGreen1 + iDGreen2;
-
-			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
-			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-			int iBlue2 = iBlue1 + iDBlue2;
-
-			if (iRed2 < 0 || iRed2 > 31)
-			{
-				InitFromEncodingBits_T();
-			}
-			else if (iGreen2 < 0 || iGreen2 > 31)
-			{
-				InitFromEncodingBits_H();
-			}
-			else if (iBlue2 < 0 || iBlue2 > 31)
-			{
-				InitFromEncodingBits_Planar();
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
-	{
-
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
-	{
-
-		m_mode = MODE_H;
-		
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) + 
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if Planar mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
-	{
-
-		m_mode = MODE_PLANAR;
-
-		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
-		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
-										m_pencodingbitsRGB8->planar.originGreen2);
-		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
-										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
-										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
-										m_pencodingbitsRGB8->planar.originBlue4);
-
-		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
-									m_pencodingbitsRGB8->planar.horizRed2);
-		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
-		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
-									m_pencodingbitsRGB8->planar.horizBlue2);
-
-		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
-									m_pencodingbitsRGB8->planar.vertRed2);
-		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
-									m_pencodingbitsRGB8->planar.vertGreen2);
-		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
-		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
-
-		DecodePixels_Planar();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			Block4x4Encoding_ETC1::PerformFirstIteration();
-			if (m_boolDone)
-			{
-				break;
-			}
-			TryPlanar(0);
-			SetDoneIfPerfect();
-			if (m_boolDone)
-			{
-				break;
-			}
-			TryTAndH(0);
-			break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
-			break;
-
-		case 3:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 4:
-			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
-			break;
-
-		case 5:
-			TryPlanar(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryTAndH(1);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 9:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 10:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in Planar mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		encodingTry.CalculatePlanarCornerColors();
-
-		encodingTry.DecodePixels_Planar();
-
-		encodingTry.CalcBlockError();
-
-		if (a_uiRadius > 0)
-		{
-			encodingTry.TwiddlePlanar();
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_PLANAR;
-			m_boolDiff = true;
-			m_boolFlip = false;
-			m_frgbaColor1 = encodingTry.m_frgbaColor1;
-			m_frgbaColor2 = encodingTry.m_frgbaColor2;
-			m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-			}
-
-			m_fError = encodingTry.m_fError;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode or H mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
-	{
-
-		CalculateBaseColorsForTAndH();
-
-		TryT(a_uiRadius);
-
-		TryH(a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate original values for base colors
-	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
-	//
-	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
-	{
-
-		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-
-		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
-
-		// find pixel farthest from average gray line
-		unsigned int uiFarthestPixel = 0;
-		float fFarthestGrayDistance2 = 0.0f;
-		unsigned int uiTransparentPixels = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// don't count transparent
-			if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX)
-			{
-				uiTransparentPixels++;
-			}
-			else
-			{
-				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
-
-				if (fGrayDistance2 > fFarthestGrayDistance2)
-				{
-					uiFarthestPixel = uiPixel;
-					fFarthestGrayDistance2 = fGrayDistance2;
-				}
-			}
-		}
-		// a transparent block should not reach this method
-		assert(uiTransparentPixels < PIXELS);
-
-		// set the original base colors to:
-		//		half way to the farthest pixel and
-		//		the mirror color on the other side of the average
-		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
-		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
-		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
-
-		// move base colors to find best fit
-		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
-		{
-			// find the center of pixels closest to each color
-			float fPixelsCloserToColor1 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor1;
-			float fPixelsCloserToColor2 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor2;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				// don't count transparent pixels
-				if (m_pafrgbaSource[uiPixel].fA == 0.0f)
-				{
-					continue;
-				}
-
-				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
-				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
-
-				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * m_pafrgbaSource[uiPixel].fA;
-					
-				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
-				{
-					fPixelsCloserToColor1 += m_pafrgbaSource[uiPixel].fA;
-					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
-				}
-				else
-				{
-					fPixelsCloserToColor2 += m_pafrgbaSource[uiPixel].fA;
-					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
-				}
-			}
-			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
-			{
-				break;
-			}
-
-			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
-			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
-
-			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
-				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
-				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
-				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
-				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
-				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
-			{
-				break;
-			}
-
-			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
-			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = m_frgbaColor2;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-														m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-														m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// use linear regression to find the best fit for colors along the edges of the 4x4 block
-	//
-	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
-	{
-		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
-		ColorFloatRGBA frgbaSlope;
-		ColorFloatRGBA frgbaOffset;
-
-		// top edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[4];
-		afrgbaRegression[2] = m_pafrgbaSource[8];
-		afrgbaRegression[3] = m_pafrgbaSource[12];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = frgbaOffset;
-		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// left edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[1];
-		afrgbaRegression[2] = m_pafrgbaSource[2];
-		afrgbaRegression[3] = m_pafrgbaSource[3];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
-		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// right edge
-		afrgbaRegression[0] = m_pafrgbaSource[12];
-		afrgbaRegression[1] = m_pafrgbaSource[13];
-		afrgbaRegression[2] = m_pafrgbaSource[14];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
-
-		// bottom edge
-		afrgbaRegression[0] = m_pafrgbaSource[3];
-		afrgbaRegression[1] = m_pafrgbaSource[7];
-		afrgbaRegression[2] = m_pafrgbaSource[11];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
-
-		// quantize corner colors to 6/7/6
-		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
-		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
-		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R, G and B independently
-	//
-	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
-	//
-	// return true if improvement
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
-	{
-		bool boolImprovement = false;
-
-		while (TwiddlePlanarR())
-		{
-			boolImprovement = true;
-		}
-
-		while (TwiddlePlanarG())
-		{
-			boolImprovement = true;
-		}
-
-		while (TwiddlePlanarB())
-		{
-			boolImprovement = true;
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
-		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
-		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
-
-		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
-		{
-			// check for out of range
-			if (iTryOriginRed < 0 || iTryOriginRed > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
-
-			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
-			{
-				// check for out of range
-				if (iTryHorizRed < 0 || iTryHorizRed > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
-
-				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
-				{
-					// check for out of range
-					if (iTryVertRed < 0 || iTryVertRed > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing G
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
-		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
-		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
-
-		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
-		{
-			// check for out of range
-			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
-
-			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
-			{
-				// check for out of range
-				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
-
-				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
-				{
-					// check for out of range
-					if (iTryVertGreen < 0 || iTryVertGreen > 127)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginGreen == iOriginGreen && 
-						iTryHorizGreen == iHorizGreen && 
-						iTryVertGreen == iVertGreen)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing B
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
-		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
-		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
-
-		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
-		{
-			// check for out of range
-			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
-
-			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
-			{
-				// check for out of range
-				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
-
-				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
-				{
-					// check for out of range
-					if (iTryVertBlue < 0 || iTryVertBlue > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits(void)
-	{
-
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			Block4x4Encoding_ETC1::SetEncodingBits();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		m_pencodingbitsRGB8->t.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		m_pencodingbitsRGB8->h.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_PLANAR);
-		assert(m_boolDiff == true);
-
-		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
-		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
-		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
-
-		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
-		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
-		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
-
-		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
-		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
-		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
-
-		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
-		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
-		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
-		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
-		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
-		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
-		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
-
-		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
-		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
-		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
-		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
-		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
-
-		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
-		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
-		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
-		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
-		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
-
-		m_pencodingbitsRGB8->planar.diff = 1;
-
-		// create valid RG differentials and an invalid B differential to trigger planar mode
-		m_pencodingbitsRGB8->planar.detect1 = 0;
-		m_pencodingbitsRGB8->planar.detect2 = 0;
-		m_pencodingbitsRGB8->planar.detect3 = 0;
-		m_pencodingbitsRGB8->planar.detect4 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect1 = 1;
-		}
-		if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect2 = 1;
-		}
-		if (iBlue2 >= 4)
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 7;
-			m_pencodingbitsRGB8->planar.detect4 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 0;
-			m_pencodingbitsRGB8->planar.detect4 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-
-			// make sure red and green don't overflow and blue does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 >= 0 && iGreen2 <= 31);
-			assert(iBlue2 < 0 || iBlue2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for H mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
-	{
-
-		int iRO = (int)roundf(m_frgbaColor1.fR * 255.0f);
-		int iGO = (int)roundf(m_frgbaColor1.fG * 255.0f);
-		int iBO = (int)roundf(m_frgbaColor1.fB * 255.0f);
-
-		int iRH = (int)roundf(m_frgbaColor2.fR * 255.0f);
-		int iGH = (int)roundf(m_frgbaColor2.fG * 255.0f);
-		int iBH = (int)roundf(m_frgbaColor2.fB * 255.0f);
-
-		int iRV = (int)roundf(m_frgbaColor3.fR * 255.0f);
-		int iGV = (int)roundf(m_frgbaColor3.fG * 255.0f);
-		int iBV = (int)roundf(m_frgbaColor3.fB * 255.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			int iX = (int)(uiPixel >> 2);
-			int iY = (int)(uiPixel & 3);
-
-			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
-			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
-			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
-
-			ColorFloatRGBA frgba;
-			frgba.fR = (float)iR / 255.0f;
-			frgba.fG = (float)iG / 255.0f;
-			frgba.fB = (float)iB / 255.0f;
-			frgba.fA = 1.0f;
-
-			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
-	//
-	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
-	//
-	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
-	{
-		typedef struct
-		{
-			float f[4];
-		} Float4;
-
-		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
-		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
-		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
-
-		float afX[MAX_PLANAR_REGRESSION_SIZE];
-		float afY[MAX_PLANAR_REGRESSION_SIZE];
-
-		// handle r, g and b separately.  don't bother with a
-		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
-			{
-				afX[uiPixel] = (float)uiPixel;
-				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
-				
-			}
-			Etc::Regression(afX, afY, a_uiPixels,
-				&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h
deleted file mode 100644
index 03754d5e3b..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-namespace Etc
-{
-
-	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
-	{
-	public:
-
-		Block4x4Encoding_RGB8(void);
-		virtual ~Block4x4Encoding_RGB8(void);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-		
-		virtual void SetEncodingBits(void);
-
-		inline ColorFloatRGBA GetColor3(void) const
-		{
-			return m_frgbaColor3;
-		}
-
-	protected:
-
-		static const unsigned int PLANAR_CORNER_COLORS = 3;
-		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
-		static const unsigned int TH_DISTANCES = 8;
-
-		static float s_afTHDistanceTable[TH_DISTANCES];
-
-		void TryPlanar(unsigned int a_uiRadius);
-		void TryTAndH(unsigned int a_uiRadius);
-
-		void InitFromEncodingBits_Planar(void);
-
-		ColorFloatRGBA	m_frgbaColor3;		// used for planar
-
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-		void SetEncodingBits_Planar(void);
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
-		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
-
-		void CalculateBaseColorsForTAndH(void);
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-	private:
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void CalculatePlanarCornerColors(void);
-
-		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
-
-		bool TwiddlePlanar(void);
-		bool TwiddlePlanarR();
-		bool TwiddlePlanarG();
-		bool TwiddlePlanarB();
-
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void DecodePixels_Planar(void);
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
deleted file mode 100644
index b94b64e68c..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
+++ /dev/null
@@ -1,1819 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8A1.cpp contains:
-	Block4x4Encoding_RGB8A1
-	Block4x4Encoding_RGB8A1_Opaque
-	Block4x4Encoding_RGB8A1_Transparent
-
-These encoders are used when targetting file format RGB8A1.
-
-Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1
-	// ####################################################################################################
-
-	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
-	{
-		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
-		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
-		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
-		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
-		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
-		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
-		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
-		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
-	{
-		m_pencodingbitsRGB8 = nullptr;
-		m_boolOpaque = false;
-		m_boolTransparent = false;
-		m_boolPunchThroughPixels = true;
-
-	}
-	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGB8A1::InitFromSource(Block4x4 *a_pblockParent,
-													ColorFloatRGBA *a_pafrgbaSource,
-													unsigned char *a_paucEncodingBits,
-													ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding_RGB8::InitFromSource(a_pblockParent,
-			a_pafrgbaSource,
-			a_paucEncodingBits,
-			a_errormetric);
-
-		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
-		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
-		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
-			{
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-			else
-			{
-				m_afDecodedAlphas[uiPixel] = 0.0f;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-
-
-		InitFromEncodingBits_ETC1(a_pblockParent,
-			a_paucEncodingBits,
-			a_pafrgbaSource,
-			a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		int iRed1 = m_pencodingbitsRGB8->differential.red1;
-		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-		int iRed2 = iRed1 + iDRed2;
-
-		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
-		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-		int iGreen2 = iGreen1 + iDGreen2;
-
-		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
-		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-		int iBlue2 = iBlue1 + iDBlue2;
-
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			InitFromEncodingBits_T();
-		}
-		else if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			InitFromEncodingBits_H();
-		}
-		else if (iBlue2 < 0 || iBlue2 > 31)
-		{
-			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
-	// if it isn't an ETC1 mode, this will be overwritten later
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
-			a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
-		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
-
-		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
-		if (iR2 < 0)
-		{
-			iR2 = 0;
-		}
-		else if (iR2 > 31)
-		{
-			iR2 = 31;
-		}
-
-		int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
-		if (iG2 < 0)
-		{
-			iG2 = 0;
-		}
-		else if (iG2 > 31)
-		{
-			iG2 = 31;
-		}
-
-		int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
-		if (iB2 < 0)
-		{
-			iB2 = 0;
-		}
-		else if (iB2 > 31)
-		{
-			iB2 = 31;
-		}
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		Decode_ETC1();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
-	{
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
-	{
-		m_mode = MODE_H;
-
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) +
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta;
-			if (m_boolOpaque)
-				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			else 
-				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
-
-			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
-			{
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-				m_afDecodedAlphas[uiPixel] = 0.0f;
-			}
-			else
-			{
-				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for T mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-					m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for H mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-					m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// RGB8A1 can't use individual mode
-	// RGB8A1 with transparent pixels can't use planar mode
-	//
-	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolOpaque);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 39.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
-			TryT(1);
-			TryH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
-	{
-		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
-													int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			m_fError = 0.0f;
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				if (uiSelector1 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel1] = 0.0f;
-				}
-				else
-				{
-					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
-					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-					m_afDecodedAlphas[uiPixel1] = 1.0f;
-				}
-
-				if (uiSelector2 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel2] = 0.0f;
-				}
-				else
-				{
-					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
-					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-					m_afDecodedAlphas[uiPixel2] = 1.0f;
-				}
-
-				float fDeltaA1 = m_afDecodedAlphas[uiPixel1] - m_pafrgbaSource[uiPixel1].fA;
-				m_fError += fDeltaA1 * fDeltaA1;
-				float fDeltaA2 = m_afDecodedAlphas[uiPixel2] - m_pafrgbaSource[uiPixel2].fA;
-				m_fError += fDeltaA2 * fDeltaA2;
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		uses s_aafCwOpaqueUnsetTable
-	//		color for selector set to 0,0,0,0
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
-		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = ColorFloatRGBA();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								if (pfrgbaSourcePixel->fA < 0.5f)
-								{
-									uiSelector = TRANSPARENT_SELECTOR;
-								}
-								else if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									continue;
-								}
-
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-								
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-																	*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-								if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									break;
-								}
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-													m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in H mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
-	{
-
-		// abort if colors and CW will pose an encoding problem
-		{
-			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
-			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
-			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
-			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-
-			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
-			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
-			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
-			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-			unsigned int uiCWLsb = m_uiCW1 & 1;
-
-			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
-				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
-			{
-				return;
-			}
-		}
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-													m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
-	{
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			SetEncodingBits_ETC1();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if ETC1 mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
-	{
-
-		// there is no individual mode in RGB8A1
-		assert(m_boolDiff);
-
-		int iRed1 = m_frgbaColor1.IntRed(31.0f);
-		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-		int iRed2 = m_frgbaColor2.IntRed(31.0f);
-		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-		int iDRed2 = iRed2 - iRed1;
-		int iDGreen2 = iGreen2 - iGreen1;
-		int iDBlue2 = iBlue2 - iBlue1;
-
-		assert(iDRed2 >= -4 && iDRed2 < 4);
-		assert(iDGreen2 >= -4 && iDGreen2 < 4);
-		assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-		m_pencodingbitsRGB8->differential.red1 = iRed1;
-		m_pencodingbitsRGB8->differential.green1 = iGreen1;
-		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
-
-		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if T mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1_Opaque
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1_Opaque::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolPunchThroughPixels);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::TryPlanar(1);
-			break;
-
-		case 4:
-			Block4x4Encoding_RGB8::TryTAndH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1_Opaque::PerformFirstIteration(void)
-	{
-		
-		// set decoded alphas
-		// calculate alpha error
-		m_fError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-
-			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
-			m_fError += fDeltaA * fDeltaA;
-		}
-
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryPlanar(0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryTAndH(0);
-		SetDoneIfPerfect();
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1_Transparent
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1_Transparent::PerformIteration(float )
-	{
-		assert(!m_boolOpaque);
-		assert(m_boolTransparent);
-		assert(!m_boolDone);
-		assert(m_uiEncodingIterations == 0);
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = false;
-
-		m_uiCW1 = 0;
-		m_uiCW2 = 0;
-
-		m_frgbaColor1 = ColorFloatRGBA();
-		m_frgbaColor2 = ColorFloatRGBA();
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
-
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-			m_afDecodedAlphas[uiPixel] = 0.0f;
-		}
-
-		CalcBlockError();
-
-		m_boolDone = true;
-		m_uiEncodingIterations++;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
deleted file mode 100644
index ff26e462f8..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcErrorMetric.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1
-	// RGB8A1 if not completely opaque or transparent
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		static const unsigned int TRANSPARENT_SELECTOR = 2;
-
-		Block4x4Encoding_RGB8A1(void);
-		virtual ~Block4x4Encoding_RGB8A1(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-										unsigned char *a_paucEncodingBits,
-										ColorFloatRGBA *a_pafrgbaSource,
-										ErrorMetric a_errormetric);
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void PerformFirstIteration(void);
-
-		void Decode_ETC1(void);
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void SetEncodingBits_ETC1(void);
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-
-	protected:
-
-		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
-		bool m_boolTransparent;			// all source pixels have alpha < 0.5
-		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
-
-		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
-
-	private:
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1_Opaque
-	// RGB8A1 if all pixels have alpha==1
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		void PerformFirstIteration(void);
-
-	private:
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1_Transparent
-	// RGB8A1 if all pixels have alpha==0
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-	private:
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
deleted file mode 100644
index 600c7ab405..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGBA8.cpp contains:
-	Block4x4Encoding_RGBA8
-	Block4x4Encoding_RGBA8_Opaque
-	Block4x4Encoding_RGBA8_Transparent
-
-These encoders are used when targetting file format RGBA8.
-
-Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8
-	// ####################################################################################################
-
-	float Block4x4Encoding_RGBA8::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
-	{
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
-		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-
-		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
-		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
-	{
-
-		m_pencodingbitsA8 = nullptr;
-
-	}
-	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGBA8::InitFromSource(Block4x4 *a_pblockParent,
-												ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGBA8::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-
-		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-													(unsigned char *) m_pencodingbitsRGB8,
-													a_pafrgbaSource,
-													a_errormetric);
-
-		// init A8 portion
-		// has to be done after InitFromEncodingBits()
-		{
-			m_fBase = m_pencodingbitsA8->data.base / 255.0f;
-			m_fMultiplier = (float)m_pencodingbitsA8->data.multiplier;
-			m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
-
-			unsigned long long int ulliSelectorBits = 0;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors0 << 40;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors1 << 32;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors2 << 24;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors3 << 16;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors4 << 8;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors5;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (ALPHA_SELECTORS - 1);
-			}
-
-			// decode the alphas
-			// calc alpha error
-			m_fError = 0.0f;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afDecodedAlphas[uiPixel] = DecodePixelAlpha(m_fBase, m_fMultiplier,
-					m_uiModifierTableIndex,
-					m_auiAlphaSelectors[uiPixel]);
-
-				float fDeltaAlpha = m_afDecodedAlphas[uiPixel] - m_pafrgbaSource[uiPixel].fA;
-				m_fError += fDeltaAlpha * fDeltaAlpha;
-			}
-		}
-
-		// redo error calc to include alpha
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
-	//
-	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		if (m_uiEncodingIterations == 0)
-		{
-			if (a_fEffort < 24.9f)
-			{
-				CalculateA8(0.0f);
-			}
-			else if (a_fEffort < 49.9f)
-			{
-				CalculateA8(1.0f);
-			}
-			else
-			{
-				CalculateA8(2.0f);
-			}
-		}
-
-		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base alpga, multiplier and selectors
-	//
-	// a_fRadius limits the range of base alpha to try
-	//
-	void Block4x4Encoding_RGBA8::CalculateA8(float a_fRadius)
-	{
-
-		// find min/max alpha
-		float fMinAlpha = 1.0f;
-		float fMaxAlpha = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-
-			// ignore border pixels
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			if (fAlpha < fMinAlpha)
-			{
-				fMinAlpha = fAlpha;
-			}
-			if (fAlpha > fMaxAlpha)
-			{
-				fMaxAlpha = fAlpha;
-			}
-		}
-		assert(fMinAlpha <= fMaxAlpha);
-
-		float fAlphaRange = fMaxAlpha - fMinAlpha;
-
-		// try each modifier table entry
-		m_fError = FLT_MAX;		// artificially high value
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			static const unsigned int MIN_VALUE_SELECTOR = 3;
-			static const unsigned int MAX_VALUE_SELECTOR = 7;
-
-			float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
-
-			float fTableEntryRange = s_aafModifierTable[uiTableEntry][MAX_VALUE_SELECTOR] -
-				s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
-
-			float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-			float fCenter = fMinAlpha + fCenterRatio*fAlphaRange;
-			fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-			float fMinBase = fCenter - (a_fRadius / 255.0f);
-			if (fMinBase < 0.0f)
-			{
-				fMinBase = 0.0f;
-			}
-
-			float fMaxBase = fCenter + (a_fRadius / 255.0f);
-			if (fMaxBase > 1.0f)
-			{
-				fMaxBase = 1.0f;
-			}
-
-			for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-			{
-
-				float fRangeMultiplier = roundf(fAlphaRange / fTableEntryRange);
-
-				float fMinMultiplier = fRangeMultiplier - a_fRadius;
-				if (fMinMultiplier < 1.0f)
-				{
-					fMinMultiplier = 1.0f;
-				}
-				else if (fMinMultiplier > 15.0f)
-				{
-					fMinMultiplier = 15.0f;
-				}
-
-				float fMaxMultiplier = fRangeMultiplier + a_fRadius;
-				if (fMaxMultiplier < 1.0f)
-				{
-					fMaxMultiplier = 1.0f;
-				}
-				else if (fMaxMultiplier > 15.0f)
-				{
-					fMaxMultiplier = 15.0f;
-				}
-
-				for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-				{
-					// find best selector for each pixel
-					unsigned int auiBestSelectors[PIXELS];
-					float afBestAlphaError[PIXELS];
-					float afBestDecodedAlphas[PIXELS];
-					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-					{
-						float fBestPixelAlphaError = FLT_MAX;
-						for (unsigned int uiSelector = 0; uiSelector < ALPHA_SELECTORS; uiSelector++)
-						{
-							float fDecodedAlpha = DecodePixelAlpha(fBase, fMultiplier, uiTableEntry, uiSelector);
-
-							// border pixels (NAN) should have zero error
-							float fPixelDeltaAlpha = isnan(m_pafrgbaSource[uiPixel].fA) ?
-															0.0f :
-															fDecodedAlpha - m_pafrgbaSource[uiPixel].fA;
-
-							float fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
-
-							if (fPixelAlphaError < fBestPixelAlphaError)
-							{
-								fBestPixelAlphaError = fPixelAlphaError;
-								auiBestSelectors[uiPixel] = uiSelector;
-								afBestAlphaError[uiPixel] = fBestPixelAlphaError;
-								afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
-							}
-						}
-					}
-
-					float fBlockError = 0.0f;
-					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-					{
-						fBlockError += afBestAlphaError[uiPixel];
-					}
-
-					if (fBlockError < m_fError)
-					{
-						m_fError = fBlockError;
-
-						m_fBase = fBase;
-						m_fMultiplier = fMultiplier;
-						m_uiModifierTableIndex = uiTableEntry;
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
-							m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel];
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
-	{
-
-		// set the RGB8 portion
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		{
-			m_pencodingbitsA8->data.base = (unsigned char)roundf(255.0f * m_fBase);
-			m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
-			m_pencodingbitsA8->data.multiplier = (unsigned char)roundf(m_fMultiplier);
-
-			unsigned long long int ulliSelectorBits = 0;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				ulliSelectorBits |= ((unsigned long long int)m_auiAlphaSelectors[uiPixel]) << uiShift;
-			}
-
-			m_pencodingbitsA8->data.selectors0 = ulliSelectorBits >> 40;
-			m_pencodingbitsA8->data.selectors1 = ulliSelectorBits >> 32;
-			m_pencodingbitsA8->data.selectors2 = ulliSelectorBits >> 24;
-			m_pencodingbitsA8->data.selectors3 = ulliSelectorBits >> 16;
-			m_pencodingbitsA8->data.selectors4 = ulliSelectorBits >> 8;
-			m_pencodingbitsA8->data.selectors5 = ulliSelectorBits;
-		}
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8_Opaque
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGBA8_Opaque::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		if (m_uiEncodingIterations == 0)
-		{
-			m_fError = 0.0f;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-		}
-
-		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8_Opaque::SetEncodingBits(void)
-	{
-
-		// set the RGB8 portion
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		m_pencodingbitsA8->data.base = 255;
-		m_pencodingbitsA8->data.table = 15;
-		m_pencodingbitsA8->data.multiplier = 15;
-		m_pencodingbitsA8->data.selectors0 = 0xFF;
-		m_pencodingbitsA8->data.selectors1 = 0xFF;
-		m_pencodingbitsA8->data.selectors2 = 0xFF;
-		m_pencodingbitsA8->data.selectors3 = 0xFF;
-		m_pencodingbitsA8->data.selectors4 = 0xFF;
-		m_pencodingbitsA8->data.selectors5 = 0xFF;
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8_Transparent
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGBA8_Transparent::PerformIteration(float )
-	{
-		assert(!m_boolDone);
-		assert(m_uiEncodingIterations == 0);
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-			m_afDecodedAlphas[uiPixel] = 0.0f;
-		}
-
-		m_fError = 0.0f;
-
-		m_boolDone = true;
-		m_uiEncodingIterations++;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8_Transparent::SetEncodingBits(void)
-	{
-
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		m_pencodingbitsA8->data.base = 0;
-		m_pencodingbitsA8->data.table = 0;
-		m_pencodingbitsA8->data.multiplier = 1;
-		m_pencodingbitsA8->data.selectors0 = 0;
-		m_pencodingbitsA8->data.selectors1 = 0;
-		m_pencodingbitsA8->data.selectors2 = 0;
-		m_pencodingbitsA8->data.selectors3 = 0;
-		m_pencodingbitsA8->data.selectors4 = 0;
-		m_pencodingbitsA8->data.selectors5 = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h
deleted file mode 100644
index 5765d36b90..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_A8;
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8
-	// RGBA8 if not completely opaque or transparent
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		Block4x4Encoding_RGBA8(void);
-		virtual ~Block4x4Encoding_RGBA8(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	protected:
-
-		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
-		static const unsigned int ALPHA_SELECTOR_BITS = 3;
-		static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
-
-		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS];
-
-		void CalculateA8(float a_fRadius);
-
-		Block4x4EncodingBits_A8 *m_pencodingbitsA8;	// A8 portion of Block4x4EncodingBits_RGBA8
-
-		float m_fBase;
-		float m_fMultiplier;
-		unsigned int m_uiModifierTableIndex;
-		unsigned int m_auiAlphaSelectors[PIXELS];
-
-	private:
-
-		inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
-										unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-		{
-			float fPixelAlpha = a_fBase + 
-								a_fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector];
-			if (fPixelAlpha < 0.0f)
-			{
-				fPixelAlpha = 0.0f;
-			}
-			else if (fPixelAlpha > 1.0f)
-			{
-				fPixelAlpha = 1.0f;
-			}
-
-			return fPixelAlpha;
-		}
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8_Opaque
-	// RGBA8 if all pixels have alpha==1
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8_Opaque : public Block4x4Encoding_RGBA8
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8_Transparent
-	// RGBA8 if all pixels have alpha==0
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8_Transparent : public Block4x4Encoding_RGBA8
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcColor.h b/thirdparty/etc2comp/EtcColor.h
deleted file mode 100644
index 7ceae05b65..0000000000
--- a/thirdparty/etc2comp/EtcColor.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	inline float LogToLinear(float a_fLog)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLog <= 0.04045f)
-		{
-			return a_fLog / 12.92f;
-		}
-		else
-		{
-			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
-		}
-	}
-
-	inline float LinearToLog(float &a_fLinear)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLinear <= 0.0031308f)
-		{
-			return 12.92f * a_fLinear;
-		}
-		else
-		{
-			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
-		}
-	}
-
-	class ColorR8G8B8A8
-	{
-	public:
-
-		unsigned char ucR;
-		unsigned char ucG;
-		unsigned char ucB;
-		unsigned char ucA;
-
-	};
-}
diff --git a/thirdparty/etc2comp/EtcColorFloatRGBA.h b/thirdparty/etc2comp/EtcColorFloatRGBA.h
deleted file mode 100644
index f2ca2c1f71..0000000000
--- a/thirdparty/etc2comp/EtcColorFloatRGBA.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-#include "EtcColor.h"
-
-#include <math.h>
-
-namespace Etc
-{
-
-	class ColorFloatRGBA
-    {
-    public:
-
-		ColorFloatRGBA(void)
-        {
-            fR = fG = fB = fA = 0.0f;
-        }
-
-		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
-        {
-            fR = a_fR;
-            fG = a_fG;
-            fB = a_fB;
-            fA = a_fA;
-        }
-
-		inline ColorFloatRGBA operator+(ColorFloatRGBA& a_rfrgba)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_rfrgba.fR;
-			frgba.fG = fG + a_rfrgba.fG;
-			frgba.fB = fB + a_rfrgba.fB;
-			frgba.fA = fA + a_rfrgba.fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator+(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_f;
-			frgba.fG = fG + a_f;
-			frgba.fB = fB + a_f;
-			frgba.fA = fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator-(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR - a_f;
-			frgba.fG = fG - a_f;
-			frgba.fB = fB - a_f;
-			frgba.fA = fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator-(ColorFloatRGBA& a_rfrgba)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR - a_rfrgba.fR;
-			frgba.fG = fG - a_rfrgba.fG;
-			frgba.fB = fB - a_rfrgba.fB;
-			frgba.fA = fA - a_rfrgba.fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator*(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR * a_f;
-			frgba.fG = fG * a_f;
-			frgba.fB = fB * a_f;
-			frgba.fA = fA;
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ScaleRGB(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = a_f * fR;
-			frgba.fG = a_f * fG;
-			frgba.fB = a_f * fB;
-			frgba.fA = fA;
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA RoundRGB(void)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = roundf(fR);
-			frgba.fG = roundf(fG);
-			frgba.fB = roundf(fB);
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ToLinear()
-		{
-			ColorFloatRGBA frgbaLinear;
-			frgbaLinear.fR = LogToLinear(fR);
-			frgbaLinear.fG = LogToLinear(fG);
-			frgbaLinear.fB = LogToLinear(fB);
-			frgbaLinear.fA = fA;
-
-			return frgbaLinear;
-		}
-
-		inline ColorFloatRGBA ToLog(void)
-		{
-			ColorFloatRGBA frgbaLog;
-			frgbaLog.fR = LinearToLog(fR);
-			frgbaLog.fG = LinearToLog(fG);
-			frgbaLog.fB = LinearToLog(fB);
-			frgbaLog.fA = fA;
-
-			return frgbaLog;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGBA8(unsigned char a_ucR, 
-			unsigned char a_ucG, unsigned char a_ucB, unsigned char a_ucA)
-		{
-			ColorFloatRGBA frgba;
-
-			frgba.fR = (float)a_ucR / 255.0f;
-			frgba.fG = (float)a_ucG / 255.0f;
-			frgba.fB = (float)a_ucB / 255.0f;
-			frgba.fA = (float)a_ucA / 255.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGB4(unsigned char a_ucR4,
-														unsigned char a_ucG4,
-														unsigned char a_ucB4)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR4 << 4) + a_ucR4);
-			unsigned char ucG8 = (unsigned char)((a_ucG4 << 4) + a_ucG4);
-			unsigned char ucB8 = (unsigned char)((a_ucB4 << 4) + a_ucB4);
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGB5(unsigned char a_ucR5,
-			unsigned char a_ucG5,
-			unsigned char a_ucB5)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR5 << 3) + (a_ucR5 >> 2));
-			unsigned char ucG8 = (unsigned char)((a_ucG5 << 3) + (a_ucG5 >> 2));
-			unsigned char ucB8 = (unsigned char)((a_ucB5 << 3) + (a_ucB5 >> 2));
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromR6G7B6(unsigned char a_ucR6,
-			unsigned char a_ucG7,
-			unsigned char a_ucB6)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR6 << 2) + (a_ucR6 >> 4));
-			unsigned char ucG8 = (unsigned char)((a_ucG7 << 1) + (a_ucG7 >> 6));
-			unsigned char ucB8 = (unsigned char)((a_ucB6 << 2) + (a_ucB6 >> 4));
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		// quantize to 4 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR4G4B4(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 4 bits
-			frgba = frgba.ClampRGB().ScaleRGB(15.0f).RoundRGB();
-			unsigned int uiR4 = (unsigned int)frgba.fR;
-			unsigned int uiG4 = (unsigned int)frgba.fG;
-			unsigned int uiB4 = (unsigned int)frgba.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float) ((uiR4 << 4) + uiR4);
-			frgba.fG = (float) ((uiG4 << 4) + uiG4);
-			frgba.fB = (float) ((uiB4 << 4) + uiB4);
-
-			frgba = frgba.ScaleRGB(1.0f/255.0f);
-
-			return frgba;
-		}
-
-		// quantize to 5 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR5G5B5(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 5 bits
-			frgba = frgba.ClampRGB().ScaleRGB(31.0f).RoundRGB();
-			unsigned int uiR5 = (unsigned int)frgba.fR;
-			unsigned int uiG5 = (unsigned int)frgba.fG;
-			unsigned int uiB5 = (unsigned int)frgba.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float)((uiR5 << 3) + (uiR5 >> 2));
-			frgba.fG = (float)((uiG5 << 3) + (uiG5 >> 2));
-			frgba.fB = (float)((uiB5 << 3) + (uiB5 >> 2));
-
-			frgba = frgba.ScaleRGB(1.0f / 255.0f);
-
-			return frgba;
-		}
-
-		// quantize to 6/7/6 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR6G7B6(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 6/7/6 bits
-			ColorFloatRGBA frgba6 = frgba.ClampRGB().ScaleRGB(63.0f).RoundRGB();
-			ColorFloatRGBA frgba7 = frgba.ClampRGB().ScaleRGB(127.0f).RoundRGB();
-			unsigned int uiR6 = (unsigned int)frgba6.fR;
-			unsigned int uiG7 = (unsigned int)frgba7.fG;
-			unsigned int uiB6 = (unsigned int)frgba6.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float)((uiR6 << 2) + (uiR6 >> 4));
-			frgba.fG = (float)((uiG7 << 1) + (uiG7 >> 6));
-			frgba.fB = (float)((uiB6 << 2) + (uiB6 >> 4));
-
-			frgba = frgba.ScaleRGB(1.0f / 255.0f);
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ClampRGB(void)
-		{
-			ColorFloatRGBA frgba = *this;
-			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
-			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
-			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
-			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
-			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
-			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ClampRGBA(void)
-		{
-			ColorFloatRGBA frgba = *this;
-			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
-			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
-			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
-			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
-			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
-			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
-			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
-			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
-
-			return frgba;
-		}
-
-		inline int IntRed(float a_fScale)
-		{
-			return (int)roundf(fR * a_fScale);
-		}
-
-		inline int IntGreen(float a_fScale)
-		{
-			return (int)roundf(fG * a_fScale);
-		}
-
-		inline int IntBlue(float a_fScale)
-		{
-			return (int)roundf(fB * a_fScale);
-		}
-
-		inline int IntAlpha(float a_fScale)
-		{
-			return (int)roundf(fA * a_fScale);
-		}
-
-		float	fR, fG, fB, fA;
-    };
-
-}
-
diff --git a/thirdparty/etc2comp/EtcConfig.h b/thirdparty/etc2comp/EtcConfig.h
deleted file mode 100644
index 3bfe1d99a8..0000000000
--- a/thirdparty/etc2comp/EtcConfig.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef _WIN32
-#define ETC_WINDOWS (1)
-#else
-#define ETC_WINDOWS (0)
-#endif
-
-#if __APPLE__
-#define ETC_OSX (1)
-#else
-#define ETC_OSX (0)
-#endif
-
-#if __unix__
-#define ETC_UNIX (1)
-#else
-#define ETC_UNIX (0)
-#endif
-
-
-// short names for common types
-#include <stdint.h>
-typedef int8_t i8;
-typedef int16_t i16;
-typedef int32_t i32;
-typedef int64_t i64;
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-typedef float	f32;
-typedef double	f64;
-
-// Keep asserts enabled in release builds during development
-#undef NDEBUG
-
-// 0=disable. stb_image can be used if you need to compress
-//other image formats like jpg
-#define USE_STB_IMAGE_LOAD 0
-
-#if ETC_WINDOWS
-#include <sdkddkver.h>
-#define _CRT_SECURE_NO_WARNINGS (1)
-#include <tchar.h>
-#endif
-
-#include <stdio.h>
-
diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.cpp b/thirdparty/etc2comp/EtcDifferentialTrys.cpp
deleted file mode 100644
index ef4cd103d9..0000000000
--- a/thirdparty/etc2comp/EtcDifferentialTrys.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcDifferentialTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcDifferentialTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-										const unsigned int *a_pauiPixelMapping1,
-										const unsigned int *a_pauiPixelMapping2,
-										unsigned int a_uiRadius,
-										int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		m_boolSeverelyBentColors = false;
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
-		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
-
-		int iDeltaRed = iRed2 - iRed1;
-		int iDeltaGreen = iGreen2 - iGreen1;
-		int iDeltaBlue = iBlue2 - iBlue1;
-
-		// make sure components are within range
-		{
-			if (iDeltaRed > 3)
-			{
-				if (iDeltaRed > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed - 3) / 2;
-				iRed2 = iRed1 + 3;
-				iDeltaRed = 3;
-			}
-			else if (iDeltaRed < -4)
-			{
-				if (iDeltaRed < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed + 4) / 2;
-				iRed2 = iRed1 - 4;
-				iDeltaRed = -4;
-			}
-			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
-			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
-
-			if (iDeltaGreen > 3)
-			{
-				if (iDeltaGreen > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen - 3) / 2;
-				iGreen2 = iGreen1 + 3;
-				iDeltaGreen = 3;
-			}
-			else if (iDeltaGreen < -4)
-			{
-				if (iDeltaGreen < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen + 4) / 2;
-				iGreen2 = iGreen1 - 4;
-				iDeltaGreen = -4;
-			}
-			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
-			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
-
-			if (iDeltaBlue > 3)
-			{
-				if (iDeltaBlue > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue - 3) / 2;
-				iBlue2 = iBlue1 + 3;
-				iDeltaBlue = 3;
-			}
-			else if (iDeltaBlue < -4)
-			{
-				if (iDeltaBlue < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue + 4) / 2;
-				iBlue2 = iBlue1 - 4;
-				iDeltaBlue = -4;
-			}
-			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
-			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
-		}
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
-										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.h b/thirdparty/etc2comp/EtcDifferentialTrys.h
deleted file mode 100644
index 71860908ff..0000000000
--- a/thirdparty/etc2comp/EtcDifferentialTrys.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class DifferentialTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 2;
-
-		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
-							ColorFloatRGBA a_frgbaColor2,
-							const unsigned int *a_pauiPixelMapping1,
-							const unsigned int *a_pauiPixelMapping2,
-							unsigned int a_uiRadius,
-							int a_iGrayOffset1, int a_iGrayOffset2);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (31- a_iDistance))
-			{
-				return (31 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 125;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-		bool m_boolSeverelyBentColors;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcErrorMetric.h b/thirdparty/etc2comp/EtcErrorMetric.h
deleted file mode 100644
index df4dcab4fb..0000000000
--- a/thirdparty/etc2comp/EtcErrorMetric.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace Etc
-{
-
-	enum ErrorMetric
-	{
-		RGBA,
-		RGBX,
-		REC709,
-		NUMERIC,
-		NORMALXYZ,
-		//
-		ERROR_METRICS,
-		//
-		BT709 = REC709
-	};
-
-	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
-	{
-		switch (errorMetric)
-		{
-		case RGBA:
-			return "RGBA";
-		case RGBX:
-			return "RGBX";
-		case REC709:
-			return "REC709";
-		case NUMERIC:
-			return "NUMERIC";
-		case NORMALXYZ:
-			return "NORMALXYZ";
-		case ERROR_METRICS:
-		default:
-			return "UNKNOWN";
-		}
-	}
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFile.cpp b/thirdparty/etc2comp/EtcFile.cpp
deleted file mode 100644
index 831a3aac45..0000000000
--- a/thirdparty/etc2comp/EtcFile.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _WIN32
-#define _CRT_SECURE_NO_WARNINGS (1)
-#endif
-
-#include "EtcConfig.h"
-
-
-#include "EtcFile.h"
-
-#include "EtcFileHeader.h"
-#include "EtcColor.h"
-#include "Etc.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-using namespace Etc;
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-			unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
-			unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-			unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		m_pstrFilename = const_cast<char *>("");
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	m_imageformat = a_imageformat;
-
-	m_uiNumMipmaps = 1;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-	m_pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(a_paucEncodingBits, [](unsigned char *p) { delete[] p; } );
-	m_pMipmapImages[0].uiEncodingBitsBytes = a_uiEncodingBitsBytes;
-	m_pMipmapImages[0].uiExtendedWidth = a_uiExtendedWidth;
-	m_pMipmapImages[0].uiExtendedHeight = a_uiExtendedHeight;
-
-	m_uiSourceWidth = a_uiSourceWidth;
-	m_uiSourceHeight = a_uiSourceHeight;
-
-	switch (m_fileformat)
-	{
-	case Format::PKM:
-		m_pheader = new FileHeader_Pkm(this);
-		break;
-
-	case Format::KTX:
-		m_pheader = new FileHeader_Ktx(this);
-		break;
-
-	default:
-		assert(0);
-		break;
-	}
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-	unsigned int a_uiNumMipmaps, RawImage *a_pMipmapImages,
-	unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		m_pstrFilename = const_cast<char *>("");
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	m_imageformat = a_imageformat;
-
-	m_uiNumMipmaps = a_uiNumMipmaps;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-
-	for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++)
-	{
-		m_pMipmapImages[mip] = a_pMipmapImages[mip];
-	}
-
-	m_uiSourceWidth = a_uiSourceWidth;
-	m_uiSourceHeight = a_uiSourceHeight;
-
-	switch (m_fileformat)
-	{
-	case Format::PKM:
-		m_pheader = new FileHeader_Pkm(this);
-		break;
-
-	case Format::KTX:
-		m_pheader = new FileHeader_Ktx(this);
-		break;
-
-	default:
-		assert(0);
-		break;
-	}
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		return;
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	FILE *pfile = fopen(m_pstrFilename, "rb");
-	if (pfile == nullptr)
-	{
-		printf("ERROR: Couldn't open %s", m_pstrFilename);
-		exit(1);
-	}
-	fseek(pfile, 0, SEEK_END);
-	unsigned int fileSize = ftell(pfile);
-	fseek(pfile, 0, SEEK_SET);
-	size_t szResult;
-
-	m_pheader = new FileHeader_Ktx(this);
-	szResult = fread( ((FileHeader_Ktx*)m_pheader)->GetData(), 1, sizeof(FileHeader_Ktx::Data), pfile);
-	assert(szResult > 0);
-
-	m_uiNumMipmaps = 1;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-
-	if (((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData > 0)
-		fseek(pfile, ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData, SEEK_CUR);
-	szResult = fread(&m_pMipmapImages->uiEncodingBitsBytes, 1, sizeof(unsigned int), pfile);
-	assert(szResult > 0);
-
-	m_pMipmapImages->paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[m_pMipmapImages->uiEncodingBitsBytes], [](unsigned char *p) { delete[] p; } );
-	assert(ftell(pfile) + m_pMipmapImages->uiEncodingBitsBytes <= fileSize);
-	szResult = fread(m_pMipmapImages->paucEncodingBits.get(), 1, m_pMipmapImages->uiEncodingBitsBytes, pfile);
-	assert(szResult == m_pMipmapImages->uiEncodingBitsBytes);
-
-	uint32_t uiInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlInternalFormat;
-	uint32_t uiBaseInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlBaseInternalFormat;
-	
-	if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC1_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC1_RGB8)
-	{
-		m_imageformat = Image::Format::ETC1;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8)
-	{
-		m_imageformat = Image::Format::RGB8;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8A1 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8A1)
-	{
-		m_imageformat = Image::Format::RGB8A1;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGBA8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGBA8)
-	{
-		m_imageformat = Image::Format::RGBA8;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
-	{
-		m_imageformat = Image::Format::R11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
-	{
-		m_imageformat = Image::Format::SIGNED_R11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
-	{
-		m_imageformat = Image::Format::RG11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
-	{
-		m_imageformat = Image::Format::SIGNED_RG11;
-	}
-	else
-	{
-		m_imageformat = Image::Format::UNKNOWN;
-	}
-
-	m_uiSourceWidth = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelWidth;
-	m_uiSourceHeight = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelHeight;
-	m_pMipmapImages->uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-	m_pMipmapImages->uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-	unsigned int uiBlocks = m_pMipmapImages->uiExtendedWidth * m_pMipmapImages->uiExtendedHeight / 16;
-	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
-	unsigned int expectedbytes = uiBlocks * Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
-	assert(expectedbytes == m_pMipmapImages->uiEncodingBitsBytes);
-
-	fclose(pfile);
-}
-
-File::~File()
-{
-	if (m_pMipmapImages != nullptr)
-	{
-		delete [] m_pMipmapImages;
-	}
-
-	if(m_pstrFilename != nullptr)
-	{
-		delete[] m_pstrFilename;
-		m_pstrFilename = nullptr;
-	}
-	if (m_pheader != nullptr)
-	{
-		delete m_pheader;
-		m_pheader = nullptr;
-	}
-}
-
-void File::UseSingleBlock(int a_iPixelX, int a_iPixelY)
-{
-	if (a_iPixelX <= -1 || a_iPixelY <= -1)
-		return;
-	if (a_iPixelX >(int) m_uiSourceWidth)
-	{
-		//if we are using a ktx thats the size of a single block or less
-		//then make sure we use the 4x4 image as the single block
-		if (m_uiSourceWidth <= 4)
-		{
-			a_iPixelX = 0;
-		}
-		else
-		{
-			printf("blockAtHV: H coordinate out of range, capped to image width\n");
-			a_iPixelX = m_uiSourceWidth - 1;
-		}
-	}
-	if (a_iPixelY >(int) m_uiSourceHeight)
-	{
-		//if we are using a ktx thats the size of a single block or less
-		//then make sure we use the 4x4 image as the single block
-		if (m_uiSourceHeight <= 4)
-		{
-			a_iPixelY= 0;
-		}
-		else
-		{
-			printf("blockAtHV: V coordinate out of range, capped to image height\n");
-			a_iPixelY = m_uiSourceHeight - 1;
-		}
-	}
-
-	unsigned int origWidth = m_uiSourceWidth;
-	unsigned int origHeight = m_uiSourceHeight;
-
-	m_uiSourceWidth = 4;
-	m_uiSourceHeight = 4;
-
-	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
-	unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
-
-	int numMipmaps = 1;
-	RawImage* pMipmapImages = new RawImage[numMipmaps];
-	pMipmapImages[0].uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-	pMipmapImages[0].uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-	pMipmapImages[0].uiEncodingBitsBytes = 0;
-	pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[uiEncodingBitsBytesPerBlock], [](unsigned char *p) { delete[] p; });
-
-	//block position in pixels
-	// remove the bottom 2 bits to get the block coordinates 
-	unsigned int iBlockPosX = (a_iPixelX & 0xFFFFFFFC);
-	unsigned int iBlockPosY = (a_iPixelY & 0xFFFFFFFC);
-
-	int numXBlocks = (origWidth / 4);
-	int numYBlocks = (origHeight / 4);
-	
-
-	// block location 
-	//int iBlockX = (a_iPixelX % 4) == 0 ? a_iPixelX / 4.0f : (a_iPixelX / 4) + 1;
-	//int iBlockY = (a_iPixelY % 4) == 0 ? a_iPixelY / 4.0f : (a_iPixelY / 4) + 1;
-	//m_paucEncodingBits += ((iBlockY * numXBlocks) + iBlockX) * uiEncodingBitsBytesPerBlock;
-
-	
-	unsigned int num = numXBlocks*numYBlocks;
-	unsigned int uiH = 0, uiV = 0;
-	unsigned char* pEncodingBits = m_pMipmapImages[0].paucEncodingBits.get();
-	for (unsigned int uiBlock = 0; uiBlock < num; uiBlock++)
-	{
-		if (uiH == iBlockPosX && uiV == iBlockPosY)
-		{
-			memcpy(pMipmapImages[0].paucEncodingBits.get(),pEncodingBits, uiEncodingBitsBytesPerBlock);
-			break;
-		}
-		pEncodingBits += uiEncodingBitsBytesPerBlock;
-		uiH += 4;
-
-		if (uiH >= origWidth)
-		{
-			uiH = 0;
-			uiV += 4;
-		}
-	}
-
-	delete [] m_pMipmapImages;
-	m_pMipmapImages = pMipmapImages;
-}
-// ----------------------------------------------------------------------------------------------------
-//
-void File::Write()
-{
-
-	FILE *pfile = fopen(m_pstrFilename, "wb");
-	if (pfile == nullptr)
-	{
-		printf("Error: couldn't open Etc file (%s)\n", m_pstrFilename);
-		exit(1);
-	}
-
-	m_pheader->Write(pfile);
-
-	for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++)
-	{
-		if(m_fileformat == Format::KTX)
-		{
-			// Write u32 image size
-			uint32_t u32ImageSize = m_pMipmapImages[mip].uiEncodingBitsBytes;
-			uint32_t szBytesWritten = fwrite(&u32ImageSize, 1, sizeof(u32ImageSize), pfile);
-			assert(szBytesWritten == sizeof(u32ImageSize));
-		}
-
-		unsigned int iResult = (int)fwrite(m_pMipmapImages[mip].paucEncodingBits.get(), 1, m_pMipmapImages[mip].uiEncodingBitsBytes, pfile);
-		if (iResult != m_pMipmapImages[mip].uiEncodingBitsBytes)
-	{
-		printf("Error: couldn't write Etc file (%s)\n", m_pstrFilename);
-		exit(1);
-		}
-	}
-
-	fclose(pfile);
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-
diff --git a/thirdparty/etc2comp/EtcFile.h b/thirdparty/etc2comp/EtcFile.h
deleted file mode 100644
index 69bf3b2d3a..0000000000
--- a/thirdparty/etc2comp/EtcFile.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-#include "EtcImage.h"
-#include "Etc.h"
-
-namespace Etc
-{
-	class FileHeader;
-	class SourceImage;
-
-	class File
-	{
-	public:
-
-		enum class Format
-		{
-			INFER_FROM_FILE_EXTENSION,
-			PKM,
-			KTX,
-		};
-
-		File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-				unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
-				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-				unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight);
-
-		File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-			unsigned int a_uiNumMipmaps, RawImage *pMipmapImages,
-			unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight );
-
-		File(const char *a_pstrFilename, Format a_fileformat);
-		~File();
-		const char *GetFilename(void) { return m_pstrFilename; }
-
-		void Read(const char *a_pstrFilename);
-		void Write(void);
-
-		inline unsigned int GetSourceWidth(void)
-		{
-			return m_uiSourceWidth;
-		}
-
-		inline unsigned int GetSourceHeight(void)
-		{
-			return m_uiSourceHeight;
-		}
-
-		inline unsigned int GetExtendedWidth(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiExtendedWidth;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline unsigned int GetExtendedHeight(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiExtendedHeight;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline Image::Format GetImageFormat()
-		{
-			return m_imageformat;
-		}
-
-		inline unsigned int GetEncodingBitsBytes(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiEncodingBitsBytes;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline unsigned char*  GetEncodingBits(unsigned int mipmapIndex = 0)
-		{
-			if( mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].paucEncodingBits.get();
-			}
-			else
-			{
-				return nullptr;
-			}
-		}
-
-		inline unsigned int GetNumMipmaps() 
-		{
-			return m_uiNumMipmaps; 
-		}
-
-		void UseSingleBlock(int a_iPixelX = -1, int a_iPixelY = -1);
-	private:
-
-		char *m_pstrFilename;               // includes directory path and file extension
-		Format m_fileformat;
-		Image::Format m_imageformat;
-		FileHeader *m_pheader;
-		unsigned int m_uiNumMipmaps;
-		RawImage*	 m_pMipmapImages;
-		unsigned int m_uiSourceWidth;
-		unsigned int m_uiSourceHeight;
-	};
-
-}
diff --git a/thirdparty/etc2comp/EtcFileHeader.cpp b/thirdparty/etc2comp/EtcFileHeader.cpp
deleted file mode 100644
index f02fcab011..0000000000
--- a/thirdparty/etc2comp/EtcFileHeader.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcFileHeader.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Pkm::FileHeader_Pkm(File *a_pfile)
-	{
-		m_pfile = a_pfile;
-
-		static const char s_acMagicNumberData[4] = { 'P', 'K', 'M', ' ' };
-		static const char s_acVersionData[2] = { '1', '0' };
-
-		for (unsigned int ui = 0; ui < sizeof(s_acMagicNumberData); ui++)
-		{
-			m_data.m_acMagicNumber[ui] = s_acMagicNumberData[ui];
-		}
-
-		for (unsigned int ui = 0; ui < sizeof(s_acVersionData); ui++)
-		{
-			m_data.m_acVersion[ui] = s_acVersionData[ui];
-		}
-
-		m_data.m_ucDataType_msb = 0;        // ETC1_RGB_NO_MIPMAPS
-		m_data.m_ucDataType_lsb = 0;
-
-		m_data.m_ucOriginalWidth_msb = (unsigned char)(m_pfile->GetSourceWidth() >> 8);
-		m_data.m_ucOriginalWidth_lsb = m_pfile->GetSourceWidth() & 0xFF;
-		m_data.m_ucOriginalHeight_msb = (unsigned char)(m_pfile->GetSourceHeight() >> 8);
-		m_data.m_ucOriginalHeight_lsb = m_pfile->GetSourceHeight() & 0xFF;
-
-		m_data.m_ucExtendedWidth_msb = (unsigned char)(m_pfile->GetExtendedWidth() >> 8);
-		m_data.m_ucExtendedWidth_lsb = m_pfile->GetExtendedWidth() & 0xFF;
-		m_data.m_ucExtendedHeight_msb = (unsigned char)(m_pfile->GetExtendedHeight() >> 8);
-		m_data.m_ucExtendedHeight_lsb = m_pfile->GetExtendedHeight() & 0xFF;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void FileHeader_Pkm::Write(FILE *a_pfile)
-	{
-
-		fwrite(&m_data, sizeof(Data), 1, a_pfile);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Ktx::FileHeader_Ktx(File *a_pfile)
-	{
-		m_pfile = a_pfile;
-
-		static const uint8_t s_au8Itentfier[12] =
-		{ 
-			0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier
-			0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier
-			0x0D, 0x0A, 0x1A, 0x0A  // final four bytes of Byte[12] identifier
-		};
-
-		for (unsigned int ui = 0; ui < sizeof(s_au8Itentfier); ui++)
-		{
-			m_data.m_au8Identifier[ui] = s_au8Itentfier[ui];
-		}
-
-		m_data.m_u32Endianness				= 0x04030201;
-		m_data.m_u32GlType					= 0;
-		m_data.m_u32GlTypeSize				= 1;
-		m_data.m_u32GlFormat				= 0;
-
-		switch (m_pfile->GetImageFormat())
-		{
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGBA8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGBA8;
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8A1;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8A1;
-			break;
-		
-		case Image::Format::R11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_R11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
-			break;
-
-		case Image::Format::SIGNED_R11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_R11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
-			break;
-		
-		case Image::Format::RG11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RG11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
-			break;
-
-		case Image::Format::SIGNED_RG11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_RG11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
-			break;
-
-		default:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC1_RGB8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC1_RGB8;
-			break;
-		}
-
-		m_data.m_u32PixelWidth				= 0;
-		m_data.m_u32PixelHeight				= 0;
-		m_data.m_u32PixelDepth				= 0;
-		m_data.m_u32NumberOfArrayElements	= 0;
-		m_data.m_u32NumberOfFaces			= 0;
-		m_data.m_u32BytesOfKeyValueData		= 0;
-
-		m_pkeyvaluepair = nullptr;
-
-		m_u32Images = 0;
-		m_u32KeyValuePairs = 0;
-
-		m_data.m_u32PixelWidth = m_pfile->GetSourceWidth();
-		m_data.m_u32PixelHeight = m_pfile->GetSourceHeight();
-		m_data.m_u32PixelDepth = 0;
-		m_data.m_u32NumberOfArrayElements = 0;
-		m_data.m_u32NumberOfFaces = 1;
-		m_data.m_u32NumberOfMipmapLevels = m_pfile->GetNumMipmaps();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void FileHeader_Ktx::Write(FILE *a_pfile)
-	{
-		size_t szBytesWritten;
-
-		// Write header
-		szBytesWritten = fwrite(&m_data, 1, sizeof(Data), a_pfile);
-		assert(szBytesWritten == sizeof(Data));
-
-		// Write KeyAndValuePairs
-		if (m_u32KeyValuePairs)
-		{
-			fwrite(m_pkeyvaluepair, m_pkeyvaluepair->u32KeyAndValueByteSize, 1, a_pfile);
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Ktx::Data *FileHeader_Ktx::GetData()
-	{
-		return &m_data;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFileHeader.h b/thirdparty/etc2comp/EtcFileHeader.h
deleted file mode 100644
index 55a9cb5d9d..0000000000
--- a/thirdparty/etc2comp/EtcFileHeader.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcFile.h"
-#include <stdio.h>
-#include <inttypes.h>
-
-namespace Etc
-{
-
-	class Image;
-
-	class FileHeader
-	{
-	public:
-
-		virtual void Write(FILE *a_pfile) = 0;
-		File GetFile();
-		virtual ~FileHeader(void) {}
-	protected:
-
-		File *m_pfile;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-    class FileHeader_Pkm : public FileHeader
-    {
-    public:
-
-		FileHeader_Pkm(File *a_pfile);
-
-		virtual void Write(FILE *a_pfile);
-		virtual ~FileHeader_Pkm(void) {}
-	private:
-
-		typedef struct
-		{
-			char m_acMagicNumber[4];
-			char m_acVersion[2];
-			unsigned char m_ucDataType_msb;             // e.g. ETC1_RGB_NO_MIPMAPS
-			unsigned char m_ucDataType_lsb;
-			unsigned char m_ucExtendedWidth_msb;     //  padded to 4x4 blocks
-			unsigned char m_ucExtendedWidth_lsb;
-			unsigned char m_ucExtendedHeight_msb;    //  padded to 4x4 blocks
-			unsigned char m_ucExtendedHeight_lsb;
-			unsigned char m_ucOriginalWidth_msb;
-			unsigned char m_ucOriginalWidth_lsb;
-			unsigned char m_ucOriginalHeight_msb;
-			unsigned char m_ucOriginalHeight_lsb;
-		} Data;
-
-		Data m_data;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-    class FileHeader_Ktx : public FileHeader
-    {
-    public:
-
-		typedef struct
-		{
-			uint32_t	u32KeyAndValueByteSize;
-		} KeyValuePair;
-
-		typedef struct
-		{
-			uint8_t m_au8Identifier[12];
-			uint32_t m_u32Endianness;
-			uint32_t m_u32GlType;
-			uint32_t m_u32GlTypeSize;
-			uint32_t m_u32GlFormat;
-			uint32_t m_u32GlInternalFormat;
-			uint32_t m_u32GlBaseInternalFormat;
-			uint32_t m_u32PixelWidth;
-			uint32_t m_u32PixelHeight;
-			uint32_t m_u32PixelDepth;
-			uint32_t m_u32NumberOfArrayElements;
-			uint32_t m_u32NumberOfFaces;
-			uint32_t m_u32NumberOfMipmapLevels;
-			uint32_t m_u32BytesOfKeyValueData;
-		} Data;
-
-		enum class InternalFormat
-		{
-			ETC1_RGB8 = 0x8D64,
-			ETC1_ALPHA8 = ETC1_RGB8,
-			//
-			ETC2_R11 = 0x9270,
-			ETC2_SIGNED_R11 = 0x9271,
-			ETC2_RG11 = 0x9272,
-			ETC2_SIGNED_RG11 = 0x9273,
-			ETC2_RGB8 = 0x9274,
-			ETC2_SRGB8 = 0x9275,
-			ETC2_RGB8A1 = 0x9276,
-			ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277,
-			ETC2_RGBA8 = 0x9278
-		};
-
-		enum class BaseInternalFormat
-		{
-			ETC2_R11 = 0x1903,
-			ETC2_RG11 = 0x8227,
-			ETC1_RGB8 = 0x1907,
-			ETC1_ALPHA8 = ETC1_RGB8,
-			//
-			ETC2_RGB8 = 0x1907,
-			ETC2_RGB8A1 = 0x1908,
-			ETC2_RGBA8 = 0x1908,
-		};
-
-		FileHeader_Ktx(File *a_pfile);
-
-		virtual void Write(FILE *a_pfile);
-		virtual ~FileHeader_Ktx(void) {}
-
-		void AddKeyAndValue(KeyValuePair *a_pkeyvaluepair);
-
-		Data* GetData();
-
-	private:
-
-		Data m_data;
-		KeyValuePair *m_pkeyvaluepair;
-		
-		uint32_t m_u32Images;
-		uint32_t m_u32KeyValuePairs;
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFilter.cpp b/thirdparty/etc2comp/EtcFilter.cpp
deleted file mode 100644
index 1ec8acdf3f..0000000000
--- a/thirdparty/etc2comp/EtcFilter.cpp
+++ /dev/null
@@ -1,404 +0,0 @@
-#include <stdlib.h>
-#include <math.h>
-#include "EtcFilter.h"
-
-
-namespace Etc
-{
-
-static const double PiConst = 3.14159265358979323846;
-
-inline double sinc(double x) 
-{
-    if ( x == 0.0 ) 
-    {
-        return 1.0;
-    }
-
-    return sin(PiConst * x) / (PiConst * x);
-}
-
-//inline float sincf( float x )
-//{
-//    x *= F_PI;
-//    if (x < 0.01f && x > -0.01f)
-//    {
-//        return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f);
-//    }
-//
-//    return sinf(x)/x;
-//}
-//
-//double bessel0(double x) 
-//{
-//    const double EPSILON_RATIO = 1E-16;
-//    double xh, sum, pow, ds;
-//    int k;
-//
-//    xh = 0.5 * x;
-//    sum = 1.0;
-//    pow = 1.0;
-//    k = 0;
-//    ds = 1.0;
-//    while (ds > sum * EPSILON_RATIO) 
-//    {
-//        ++k;
-//        pow = pow * (xh / k);
-//        ds = pow * pow;
-//        sum = sum + ds;
-//    }
-//
-//    return sum;
-//}
-
-//**--------------------------------------------------------------------------
-//** Name: kaiser(double alpha, double half_width, double x) 
-//** Returns:
-//** Description: Alpha controls shape of filter.  We are using 4.
-//**--------------------------------------------------------------------------
-//inline double kaiser(double alpha, double half_width, double x) 
-//{
-//    double ratio = (x / half_width);
-//    return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha);
-//}
-//
-//float Filter_Lanczos4Sinc(float x)
-//{
-//    if (x <= -4.0f || x >= 4.0f)    // half-width of 4
-//    {
-//        return 0.0;
-//    }
-//
-//    return sinc(0.875f * x) * sinc(0.25f * x);
-//}
-//
-//double Filter_Kaiser4( double t )
-//{
-//    return kaiser( 4.0, 3.0, t);
-//}
-//
-//double Filter_KaiserOptimal( double t )
-//{
-//    return kaiser( 8.93, 3.0f, t);
-//}                  
-
-double FilterLanczos3( double t )
-{
-	if ( t <= -3.0 || t >= 3.0 ) 
-    {
-        return 0.0;
-    }
-
-    return sinc( t ) * sinc( t / 3.0 );
-}
-
-double FilterBox( double t )
-{
-    return ( t > -0.5 && t < 0.5) ? 1.0 : 0.0;
-}
-
-double FilterLinear( double t )
-{
-	if (t < 0.0) t = -t;
-
-    return (t < 1.0) ? (1.0 - t) : 0.0;
-}
-
-
-//**--------------------------------------------------------------------------
-//** Name: CalcContributions( int srcSize, 
-//**                          int destSize, 
-//**                          double filterSize, 
-//**						  bool wrap,
-//**                          double (*FilterProc)(double), 
-//**                          FilterWeights contrib[] )
-//** Returns: void
-//** Description:
-//**--------------------------------------------------------------------------
-void CalcContributions( int srcSize, int destSize, double filterSize, bool wrap, double (*FilterProc)(double), FilterWeights contrib[] )
-{
-    double scale;
-    double filterScale;
-    double center;
-    double totalWeight;
-    double weight;
-    int   iRight;
-    int   iLeft;
-    int   iDest;
-
-    scale = (double)destSize / srcSize;
-    if ( scale < 1.0 )
-    {
-        filterSize = filterSize / scale;
-        filterScale = scale;
-    }
-    else
-    {
-        filterScale = 1.0;
-    }
-
-    if ( filterSize > (double)MaxFilterSize )
-    {
-        filterSize = (double)MaxFilterSize;
-    }
-
-    for ( iDest = 0; iDest < destSize; ++iDest )
-    {
-        center = (double)iDest / scale;
-
-        iLeft = (int)ceil(center - filterSize);
-		iRight = (int)floor(center + filterSize);
-
-		if ( !wrap )
-		{
-        if ( iLeft < 0 )
-        {
-            iLeft = 0;
-        }
-
-        if ( iRight >= srcSize )
-        {
-            iRight = srcSize - 1;
-        }
-		}
-
-		int numWeights = iRight - iLeft + 1;
-
-        contrib[iDest].first = iLeft;
-        contrib[iDest].numWeights = numWeights;
-
-        totalWeight = 0;
-		double t = ((double)iLeft - center) * filterScale;
-		for (int i = 0; i < numWeights; i++)
-        {
-			weight = (*FilterProc)(t) * filterScale;
-            totalWeight += weight;
-			contrib[iDest].weight[i] = weight;
-			t += filterScale;
-        }
-
-        //**--------------------------------------------------------
-        //** Normalize weights by dividing by the sum of the weights
-        //**--------------------------------------------------------
-        if ( totalWeight > 0.0 )
-        {   
-            for ( int i = 0; i < numWeights; i++)
-            {
-                contrib[iDest].weight[i] /= totalWeight;
-            }
-        }
-    }
-}
-
-//**-------------------------------------------------------------------------
-//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, 
-//**                       int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDestImage, 
-//**                       int destWidth, int destHeight, 
-//**                       double (*FilterProc)(double) )
-//** Returns: 0 on failure and 1 on success
-//** Description: Filters a 2d image with a two pass filter by averaging the
-//**    weighted contributions of the pixels within the filter region.  The
-//**    contributions are determined by a weighting function parameter.
-//**-------------------------------------------------------------------------
-int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                    RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) )
-{
-    FilterWeights *contrib;
-    RGBCOLOR *pPixel;
-    RGBCOLOR *pSrcPixel;
-    RGBCOLOR *pTempImage;
-    int iRow;
-    int iCol;
-    int iSrcCol;
-    int iSrcRow;
-    int iWeight;
-    double dRed;
-    double dGreen;
-    double dBlue;
-    double dAlpha;
-    double filterSize = 3.0;
-
-	int maxDim = (srcWidth>srcHeight)?srcWidth:srcHeight;
-	contrib = (FilterWeights*)malloc(maxDim * sizeof(FilterWeights));
-
-	//**------------------------------------------------------------------------
-    //** Need to create a temporary image to stuff the horizontally scaled image
-    //**------------------------------------------------------------------------
-    pTempImage = (RGBCOLOR *)malloc( destWidth * srcHeight * sizeof(RGBCOLOR) );
-    if ( pTempImage == NULL )
-    {
-        // -- GODOT start --
-        free( contrib );
-        // -- GODOT end --
-        return 0;
-    }
-
-    //**-------------------------------------------------------
-    //** Horizontally filter the image into the temporary image
-    //**-------------------------------------------------------
-	bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X);
-	CalcContributions( srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib );
-    for ( iRow = 0; iRow < srcHeight; iRow++ )
-    {
-        for ( iCol = 0; iCol < destWidth; iCol++ )
-        {
-            dRed   = 0;
-            dGreen = 0;
-            dBlue  = 0;
-            dAlpha = 0;
-
-            for ( iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++ )
-            {
-                iSrcCol = iWeight + contrib[iCol].first;
-				if (bWrapHorizontal)
-				{
-					iSrcCol = (iSrcCol < 0) ? (srcWidth + iSrcCol) : (iSrcCol >= srcWidth) ? (iSrcCol - srcWidth) : iSrcCol;
-				}
-                pSrcPixel = pSrcImage + (iRow * srcWidth) + iSrcCol;
-                dRed   += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[0];
-                dGreen += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[1];
-                dBlue  += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[2];
-                dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[3];
-            }
-
-            pPixel = pTempImage + (iRow * destWidth) + iCol;
-			pPixel->rgba[0] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel->rgba[1] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel->rgba[2] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel->rgba[3] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dAlpha)));
-        }
-    }
-
-    //**-------------------------------------------------------
-    //** Vertically filter the image into the destination image
-    //**-------------------------------------------------------
-	bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y);
-	CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib);
-    for ( iCol = 0; iCol < destWidth; iCol++ )
-    {
-        for ( iRow = 0; iRow < destHeight; iRow++ )
-        {
-            dRed   = 0;
-            dGreen = 0;
-            dBlue  = 0;
-            dAlpha = 0;
-
-            for ( iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++ )
-            {
-                iSrcRow = iWeight + contrib[iRow].first;
-				if (bWrapVertical)
-				{
-					iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow;
-				}
-                pSrcPixel = pTempImage + (iSrcRow * destWidth) + iCol;
-                dRed   += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[0];
-                dGreen += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[1];
-                dBlue  += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[2];
-                dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[3];
-            }
-
-            pPixel = pDestImage + (iRow * destWidth) + iCol;
-            pPixel->rgba[0]   = (unsigned char)(std::max( 0.0, std::min( 255.0, dRed)));
-            pPixel->rgba[1] = (unsigned char)(std::max( 0.0, std::min( 255.0, dGreen)));
-            pPixel->rgba[2]  = (unsigned char)(std::max( 0.0, std::min( 255.0, dBlue)));
-            pPixel->rgba[3] = (unsigned char)(std::max( 0.0, std::min( 255.0, dAlpha)));
-        }
-    }
-
-    free( pTempImage );
-	free( contrib );
-
-    return 1;
-}
-
-//**-------------------------------------------------------------------------
-//** Name: FilterResample(RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDstImage, int dstWidth, int dstHeight)
-//** Returns: 1
-//** Description: This function runs a 2d box filter over the srouce image
-//** to produce the destination image.
-//**-------------------------------------------------------------------------
-void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                     RGBCOLOR *pDstImage, int dstWidth, int dstHeight )
-{
-    int iRow;
-    int iCol;
-    int iSampleRow;
-    int iSampleCol;
-    int iFirstSampleRow;
-    int iFirstSampleCol;
-    int iLastSampleRow;
-    int iLastSampleCol;
-    int red;
-    int green;
-    int blue;
-    int alpha;
-    int samples;
-    float xScale;
-    float yScale;
-
-    RGBCOLOR *pSrcPixel;
-    RGBCOLOR *pDstPixel;
-
-    xScale = (float)srcWidth / dstWidth;
-    yScale = (float)srcHeight / dstHeight;
-
-    for ( iRow = 0; iRow < dstHeight; iRow++ )
-    {
-        for ( iCol = 0; iCol < dstWidth; iCol++ )
-        {
-            iFirstSampleRow = (int)(iRow * yScale);
-            iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1);
-            if ( iLastSampleRow >= srcHeight )
-            {
-                iLastSampleRow = srcHeight - 1;
-            }
-
-            iFirstSampleCol = (int)(iCol * xScale);
-            iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1);
-            if ( iLastSampleCol >= srcWidth )
-            {
-                iLastSampleCol = srcWidth - 1;
-            }
-
-            samples = 0;
-            red     = 0;
-            green   = 0;
-            blue    = 0;
-            alpha   = 0;
-            for ( iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++ )
-            {
-                for ( iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++ )
-                {
-                    pSrcPixel = pSrcImage + iSampleRow * srcWidth + iSampleCol;
-                    red   += pSrcPixel->rgba[0];
-                    green += pSrcPixel->rgba[1];
-                    blue  += pSrcPixel->rgba[2];
-                    alpha += pSrcPixel->rgba[3];
-
-                    samples++;
-                }
-            }
-
-            pDstPixel = pDstImage + iRow * dstWidth + iCol;
-            if ( samples > 0 )
-            {
-                pDstPixel->rgba[0] = static_cast<uint8_t>(red / samples);
-                pDstPixel->rgba[1] = static_cast<uint8_t>(green / samples);
-                pDstPixel->rgba[2] = static_cast<uint8_t>(blue / samples);
-                pDstPixel->rgba[3] = static_cast<uint8_t>(alpha / samples);
-            }
-            else
-            {
-                pDstPixel->rgba[0] = static_cast<uint8_t>(red);
-                pDstPixel->rgba[1] = static_cast<uint8_t>(green);
-                pDstPixel->rgba[2] = static_cast<uint8_t>(blue);
-                pDstPixel->rgba[3] = static_cast<uint8_t>(alpha);
-            }
-        }
-    }
-}
-
-
-}
-\ No newline at end of file
diff --git a/thirdparty/etc2comp/EtcFilter.h b/thirdparty/etc2comp/EtcFilter.h
deleted file mode 100644
index fcf125c6df..0000000000
--- a/thirdparty/etc2comp/EtcFilter.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#pragma once
-#include <stdint.h>
-#include <algorithm>
-
-namespace Etc
-{
-
-enum FilterEnums
-{
-	MaxFilterSize = 32
-};
-
-enum WrapFlags
-{
-	FILTER_WRAP_NONE = 0,
-	FILTER_WRAP_X = 0x1,
-	FILTER_WRAP_Y = 0x2
-};
-
-typedef struct tagFilterWeights
-{
-	int   first;
-	int   numWeights;
-	double weight[MaxFilterSize * 2 + 1];
-} FilterWeights;
-
-typedef struct tagRGBCOLOR
-{
-	union
-	{
-		uint32_t ulColor;
-		uint8_t rgba[4];
-	};
-} RGBCOLOR;
-
-
-double FilterBox( double t );
-double FilterLinear( double t );
-double FilterLanczos3( double t );
-
-int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                    RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) );
-void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                     RGBCOLOR *pDstImage, int dstWidth, int dstHeight );
-
-
-void CalcContributions(int srcSize, int destSize, double filterSize, bool wrap, double(*FilterProc)(double), FilterWeights contrib[]);
-
-template <typename T>
-void FilterResample(T *pSrcImage, int srcWidth, int srcHeight, T *pDstImage, int dstWidth, int dstHeight)
-{
-	float xScale;
-	float yScale;
-
-	T *pSrcPixel;
-	T *pDstPixel;
-
-	xScale = (float)srcWidth / dstWidth;
-	yScale = (float)srcHeight / dstHeight;
-
-	for (int iRow = 0; iRow < dstHeight; iRow++)
-	{
-		for (int iCol = 0; iCol < dstWidth; iCol++)
-		{
-			int samples;
-			int iFirstSampleRow;
-			int iFirstSampleCol;
-			int iLastSampleRow;
-			int iLastSampleCol;
-			float red;
-			float green;
-			float blue;
-			float alpha;
-
-			iFirstSampleRow = (int)(iRow * yScale);
-			iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1);
-			if (iLastSampleRow >= srcHeight)
-			{
-				iLastSampleRow = srcHeight - 1;
-			}
-
-			iFirstSampleCol = (int)(iCol * xScale);
-			iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1);
-			if (iLastSampleCol >= srcWidth)
-			{
-				iLastSampleCol = srcWidth - 1;
-			}
-
-			samples = 0;
-			red = 0.f;
-			green = 0.f;
-			blue = 0.f;
-			alpha = 0.f;
-			for (int iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++)
-			{
-				for (int iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++)
-				{
-					pSrcPixel = pSrcImage + (iSampleRow * srcWidth + iSampleCol) * 4;
-					red += static_cast<float>(pSrcPixel[0]);
-					green += static_cast<float>(pSrcPixel[1]);
-					blue += static_cast<float>(pSrcPixel[2]);
-					alpha += static_cast<float>(pSrcPixel[3]);
-
-					samples++;
-				}
-			}
-
-			pDstPixel = pDstImage + (iRow * dstWidth + iCol) * 4;
-			if (samples > 0)
-			{
-				pDstPixel[0] = static_cast<T>(red / samples);
-				pDstPixel[1] = static_cast<T>(green / samples);
-				pDstPixel[2] = static_cast<T>(blue / samples);
-				pDstPixel[3] = static_cast<T>(alpha / samples);
-			}
-			else
-			{
-				pDstPixel[0] = static_cast<T>(red);
-				pDstPixel[1] = static_cast<T>(green);
-				pDstPixel[2] = static_cast<T>(blue);
-				pDstPixel[3] = static_cast<T>(alpha);
-			}
-		}
-	}
-
-}
-
-//**-------------------------------------------------------------------------
-//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, 
-//**                       int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDestImage, 
-//**                       int destWidth, int destHeight, 
-//**                       double (*FilterProc)(double) )
-//** Returns: 0 on failure and 1 on success
-//** Description: Filters a 2d image with a two pass filter by averaging the
-//**    weighted contributions of the pixels within the filter region.  The
-//**    contributions are determined by a weighting function parameter.
-//**-------------------------------------------------------------------------
-template <typename T>
-int FilterTwoPass(T *pSrcImage, int srcWidth, int srcHeight,
-	T *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double(*FilterProc)(double))
-{
-	const int numComponents = 4;
-	FilterWeights *contrib;
-	T *pPixel;
-	T *pTempImage;
-	double dRed;
-	double dGreen;
-	double dBlue;
-	double dAlpha;
-	double filterSize = 3.0;
-
-	int maxDim = (srcWidth>srcHeight) ? srcWidth : srcHeight;
-	contrib = new FilterWeights[maxDim];
-
-	//**------------------------------------------------------------------------
-	//** Need to create a temporary image to stuff the horizontally scaled image
-	//**------------------------------------------------------------------------
-	pTempImage = new T[destWidth * srcHeight * numComponents];
-	if (pTempImage == NULL)
-	{
-		return 0;
-	}
-
-	//**-------------------------------------------------------
-	//** Horizontally filter the image into the temporary image
-	//**-------------------------------------------------------
-	bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X);
-	CalcContributions(srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib);
-	for (int iRow = 0; iRow < srcHeight; iRow++)
-	{
-		for (int iCol = 0; iCol < destWidth; iCol++)
-		{
-			dRed = 0;
-			dGreen = 0;
-			dBlue = 0;
-			dAlpha = 0;
-
-			for (int iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++)
-			{
-				int iSrcCol = iWeight + contrib[iCol].first;
-				if(bWrapHorizontal)
-				{
-					iSrcCol = (iSrcCol < 0)?(srcWidth+iSrcCol):(iSrcCol >= srcWidth)?(iSrcCol-srcWidth):iSrcCol;
-				}
-				T* pSrcPixel = pSrcImage + ((iRow * srcWidth) + iSrcCol)*numComponents;
-				dRed += contrib[iCol].weight[iWeight] * pSrcPixel[0];
-				dGreen += contrib[iCol].weight[iWeight] * pSrcPixel[1];
-				dBlue += contrib[iCol].weight[iWeight] * pSrcPixel[2];
-				dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel[3];
-			}
-
-			pPixel = pTempImage + ((iRow * destWidth) + iCol)*numComponents;
-			pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha)));
-		}
-	}
-
-	//**-------------------------------------------------------
-	//** Vertically filter the image into the destination image
-	//**-------------------------------------------------------
-	bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y);
-	CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib);
-	for (int iCol = 0; iCol < destWidth; iCol++)
-	{
-		for (int iRow = 0; iRow < destHeight; iRow++)
-		{
-			dRed = 0;
-			dGreen = 0;
-			dBlue = 0;
-			dAlpha = 0;
-
-			for (int iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++)
-			{
-				int iSrcRow = iWeight + contrib[iRow].first;
-				if (bWrapVertical)
-				{
-					iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow;
-				}
-				T* pSrcPixel = pTempImage + ((iSrcRow * destWidth) + iCol)*numComponents;
-				dRed += contrib[iRow].weight[iWeight] * pSrcPixel[0];
-				dGreen += contrib[iRow].weight[iWeight] * pSrcPixel[1];
-				dBlue += contrib[iRow].weight[iWeight] * pSrcPixel[2];
-				dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel[3];
-			}
-
-			pPixel = pDestImage + ((iRow * destWidth) + iCol)*numComponents;
-			pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha)));
-		}
-	}
-
-	delete[] pTempImage;
-	delete[] contrib;
-
-	return 1;
-}
-
-
-}
-\ No newline at end of file
diff --git a/thirdparty/etc2comp/EtcImage.cpp b/thirdparty/etc2comp/EtcImage.cpp
deleted file mode 100644
index 7a1058844d..0000000000
--- a/thirdparty/etc2comp/EtcImage.cpp
+++ /dev/null
@@ -1,685 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcImage.cpp
-
-Image is an array of 4x4 blocks that represent the encoding of the source image
-
-*/
-
-#include "EtcConfig.h"
-
-#include <stdlib.h>
-
-#include "EtcImage.h"
-
-#include "Etc.h"
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcSortedBlockList.h"
-
-#if ETC_WINDOWS
-#include <windows.h>
-#endif
-#include <ctime>
-#include <chrono>
-#include <future>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-// fix conflict with Block4x4::AlphaMix
-#ifdef OPAQUE
-#undef OPAQUE
-#endif
-#ifdef TRANSPARENT
-#undef TRANSPARENT
-#endif
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Image::Image(void)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_warningsToCapture = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = nullptr;
-
-		m_pablock = nullptr;
-
-		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-		m_uiEncodingBitsBytes = 0;
-		m_paucEncodingBits = nullptr;
-
-		m_format = Format::UNKNOWN;
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// constructor using source image
-	// used to set state before Encode() is called
-	//
-	Image::Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
-					unsigned int a_uiSourceHeight, 
-					ErrorMetric a_errormetric)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_warningsToCapture = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = (ColorFloatRGBA *) a_pafSourceRGBA;
-		m_uiSourceWidth = a_uiSourceWidth;
-		m_uiSourceHeight = a_uiSourceHeight;
-
-		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-		m_uiBlockColumns = m_uiExtendedWidth >> 2;
-		m_uiBlockRows = m_uiExtendedHeight >> 2;
-
-		m_pablock = new Block4x4[GetNumberOfBlocks()];
-		assert(m_pablock);
-
-		m_format = Format::UNKNOWN;
-
-		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-		m_uiEncodingBitsBytes = 0;
-		m_paucEncodingBits = nullptr;
-
-		m_errormetric = a_errormetric;
-		m_fEffort = 0.0f;
-
-		m_iEncodeTime_ms = -1;
-
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-		m_bVerboseOutput = false;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// constructor using encoding bits
-	// recreates encoding state using a previously encoded image
-	//
-	Image::Image(Format a_format,
-					unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-					unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
-					Image *a_pimageSource, ErrorMetric a_errormetric)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = nullptr;
-		m_uiSourceWidth = a_uiSourceWidth;
-		m_uiSourceHeight = a_uiSourceHeight;
-
-		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-		m_uiBlockColumns = m_uiExtendedWidth >> 2;
-		m_uiBlockRows = m_uiExtendedHeight >> 2;
-
-		unsigned int uiBlocks = GetNumberOfBlocks();
-
-		m_pablock = new Block4x4[uiBlocks];
-		assert(m_pablock);
-
-		m_format = a_format;
-
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-		
-		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
-		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
-			return;
-		}
-		m_uiEncodingBitsBytes = a_uiEncodingBitsBytes;
-		m_paucEncodingBits = a_paucEncidingBits;
-
-		m_errormetric = a_errormetric;
-		m_fEffort = 0.0f;
-		m_bVerboseOutput = false;
-		m_iEncodeTime_ms = -1;
-		
-		unsigned char *paucEncodingBits = m_paucEncodingBits;
-		unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-
-		unsigned int uiH = 0;
-		unsigned int uiV = 0;
-		for (unsigned int uiBlock = 0; uiBlock < uiBlocks; uiBlock++)
-		{
-			m_pablock[uiBlock].InitFromEtcEncodingBits(a_format, uiH, uiV, paucEncodingBits, 
-														a_pimageSource, a_errormetric);
-			paucEncodingBits += uiEncodingBitsBytesPerBlock;
-			uiH += 4;
-			if (uiH >= m_uiSourceWidth)
-			{
-				uiH = 0;
-				uiV += 4;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Image::~Image(void)
-	{
-		if (m_pablock != nullptr)
-		{
-			delete[] m_pablock;
-			m_pablock = nullptr;
-		}
-
-		/*if (m_paucEncodingBits != nullptr)
-		{
-			delete[] m_paucEncodingBits;
-			m_paucEncodingBits = nullptr;
-		}*/
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// encode an image
-	// create a set of encoding bits that conforms to a_format
-	// find best fit using a_errormetric
-	// explore a range of possible encodings based on a_fEffort (range = [0:100])
-	// speed up process using a_uiJobs as the number of process threads (a_uiJobs must not excede a_uiMaxJobs)
-	//
-	Image::EncodingStatus Image::Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, unsigned int a_uiJobs, unsigned int a_uiMaxJobs)
-	{
-
-		auto start = std::chrono::steady_clock::now();
-		
-		m_encodingStatus = EncodingStatus::SUCCESS;
-
-		m_format = a_format;
-		m_errormetric = a_errormetric;
-		m_fEffort = a_fEffort;
-
-		if (m_errormetric < 0 || m_errormetric > ERROR_METRICS)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_ERROR_METRIC);
-			return m_encodingStatus;
-		}
-
-		if (m_fEffort < ETCCOMP_MIN_EFFORT_LEVEL)
-		{
-			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
-			m_fEffort = ETCCOMP_MIN_EFFORT_LEVEL;
-		}
-		else if (m_fEffort > ETCCOMP_MAX_EFFORT_LEVEL)
-		{
-			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
-			m_fEffort = ETCCOMP_MAX_EFFORT_LEVEL;
-		}
-		if (a_uiJobs < 1)
-		{
-			a_uiJobs = 1;
-			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
-		}
-		else if (a_uiJobs > a_uiMaxJobs)
-		{
-			a_uiJobs = a_uiMaxJobs;
-			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
-		}
-
-		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
-
-		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
-			return m_encodingStatus;
-		}
-
-		assert(m_paucEncodingBits == nullptr);
-		m_uiEncodingBitsBytes = GetNumberOfBlocks() * Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-		m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes];
-
-		InitBlocksAndBlockSorter();
-
-
-		std::future<void> *handle = new std::future<void>[a_uiMaxJobs];
-
-		unsigned int uiNumThreadsNeeded = 0;
-		unsigned int uiUnfinishedBlocks = GetNumberOfBlocks();
-
-		uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
-			
-		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-		{
-			handle[i] = async(std::launch::async, &Image::RunFirstPass, this, i, uiNumThreadsNeeded);
-		}
-
-		RunFirstPass(uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
-
-		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-		{
-			handle[i].get();
-		}
-
-		// perform effort-based encoding
-		if (m_fEffort > ETCCOMP_MIN_EFFORT_LEVEL)
-		{
-			unsigned int uiFinishedBlocks = 0;
-			unsigned int uiTotalEffortBlocks = static_cast<unsigned int>(roundf(0.01f * m_fEffort  * GetNumberOfBlocks()));
-
-			if (m_bVerboseOutput)
-			{
-				printf("effortblocks = %d\n", uiTotalEffortBlocks);
-			}
-			unsigned int uiPass = 0;
-			while (1)
-			{
-				if (m_bVerboseOutput)
-				{
-					uiPass++;
-					printf("pass %u\n", uiPass);
-				}
-				m_psortedblocklist->Sort();
-				uiUnfinishedBlocks = m_psortedblocklist->GetNumberOfSortedBlocks();
-				uiFinishedBlocks = GetNumberOfBlocks() - uiUnfinishedBlocks;
-				if (m_bVerboseOutput)
-				{
-					printf("    %u unfinished blocks\n", uiUnfinishedBlocks);
-					// m_psortedblocklist->Print();
-				}
-
-				
-
-				//stop enocding when we did enough to satify the effort percentage
-				if (uiFinishedBlocks >= uiTotalEffortBlocks)
-				{
-					if (m_bVerboseOutput)
-					{
-						printf("Finished %d Blocks out of %d\n", uiFinishedBlocks, uiTotalEffortBlocks);
-					}
-					break;
-				}
-
-				unsigned int uiIteratedBlocks = 0;
-				unsigned int blocksToIterateThisPass = (uiTotalEffortBlocks - uiFinishedBlocks);
-				uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
-
-				if (uiNumThreadsNeeded <= 1)
-				{
-					//since we already how many blocks each thread will process
-					//cap the thread limit to do the proper amount of work, and not more
-					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, 0, 1);
-				}
-				else
-				{
-					//we have a lot of work to do, so lets multi thread it
-					std::future<unsigned int> *handleToBlockEncoders = new std::future<unsigned int>[uiNumThreadsNeeded-1];
-
-					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-					{
-						handleToBlockEncoders[i] = async(std::launch::async, &Image::IterateThroughWorstBlocks, this, blocksToIterateThisPass, i, uiNumThreadsNeeded);
-					}
-					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
-
-					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-					{
-						uiIteratedBlocks += handleToBlockEncoders[i].get();
-					}
-
-					delete[] handleToBlockEncoders;
-				}
-
-				if (m_bVerboseOutput)
-				{
-					printf("    %u iterated blocks\n", uiIteratedBlocks);
-				}
-			}
-		}
-
-		// generate Etc2-compatible bit-format 4x4 blocks
-		for (int i = 0; i < (int)a_uiJobs - 1; i++)
-		{
-			handle[i] = async(std::launch::async, &Image::SetEncodingBits, this, i, a_uiJobs);
-		}
-		SetEncodingBits(a_uiJobs - 1, a_uiJobs);
-
-		for (int i = 0; i < (int)a_uiJobs - 1; i++)
-		{
-			handle[i].get();
-		}
-
-		auto end = std::chrono::steady_clock::now();
-		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
-		m_iEncodeTime_ms = (int)elapsed.count();
-
-		delete[] handle;
-		delete m_psortedblocklist;
-		return m_encodingStatus;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// iterate the encoding thru the blocks with the worst error
-	// stop when a_uiMaxBlocks blocks have been iterated
-	// split the blocks between the process threads using a_uiMultithreadingOffset and a_uiMultithreadingStride
-	//
-	unsigned int Image::IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, 
-													unsigned int a_uiMultithreadingOffset, 
-													unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-		unsigned int uiIteratedBlocks = a_uiMultithreadingOffset;
-
-		SortedBlockList::Link *plink = m_psortedblocklist->GetLinkToFirstBlock();
-		for (plink = plink->Advance(a_uiMultithreadingOffset);
-				plink != nullptr;
-				plink = plink->Advance(a_uiMultithreadingStride) )
-		{
-			if (uiIteratedBlocks >= a_uiMaxBlocks)
-			{
-				break;
-			}
-
-			plink->GetBlock()->PerformEncodingIteration(m_fEffort);
-
-			uiIteratedBlocks += a_uiMultithreadingStride;	
-		}
-
-		return uiIteratedBlocks;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// determine which warnings to check for during Encode() based on encoding format
-	//
-	void Image::FindEncodingWarningTypesForCurFormat()
-	{
-		TrackEncodingWarning(WARNING_ALL_TRANSPARENT_PIXELS);
-		TrackEncodingWarning(WARNING_SOME_RGBA_NOT_0_TO_1);
-		switch (m_format)
-		{
-		case Image::Format::ETC1:
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
-			break;
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
-			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-			break;
-
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-			break;
-		case Image::Format::FORMATS:
-		case Image::Format::UNKNOWN:
-		default:
-			assert(0);
-			break;
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// examine source pixels to check for warnings
-	//
-	void Image::FindAndSetEncodingWarnings()
-	{
-		int numPixels = (m_uiBlockRows * 4) * (m_uiBlockColumns * 4);
-		if (m_iNumOpaquePixels == numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_OPAQUE_PIXELS);
-		}
-		if (m_iNumOpaquePixels < numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_NON_OPAQUE_PIXELS);
-		}
-		if (m_iNumTranslucentPixels > 0)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_TRANSLUCENT_PIXELS);
-		}
-		if (m_iNumTransparentPixels == numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_TRANSPARENT_PIXELS);
-		}
-		if (m_numColorValues.fB > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-		}
-		if (m_numColorValues.fG > 0.0f) 
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
-		}
-
-		if (m_numOutOfRangeValues.fR > 0.0f || m_numOutOfRangeValues.fG > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
-		}
-		if (m_numOutOfRangeValues.fB > 0.0f || m_numOutOfRangeValues.fA > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
-		}
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for a given image format
-	//
-	const char * Image::EncodingFormatToString(Image::Format a_format)
-	{
-		switch (a_format)
-		{
-		case Image::Format::ETC1:
-			return "ETC1";
-		case Image::Format::RGB8:
-			return "RGB8";
-		case Image::Format::SRGB8:
-			return "SRGB8";
-
-		case Image::Format::RGB8A1:
-			return "RGB8A1";
-		case Image::Format::SRGB8A1:
-			return "SRGB8A1";
-		case Image::Format::RGBA8:
-			return "RGBA8";
-		case Image::Format::SRGBA8:
-			return "SRGBA8";
-
-		case Image::Format::R11:
-			return "R11";
-		case Image::Format::SIGNED_R11:
-			return "SIGNED_R11";
-
-		case Image::Format::RG11:
-			return "RG11";
-		case Image::Format::SIGNED_RG11:
-			return "SIGNED_RG11";
-		case Image::Format::FORMATS:
-		case Image::Format::UNKNOWN:
-		default:
-			return "UNKNOWN";
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for the image's format
-	//
-	const char * Image::EncodingFormatToString(void)
-	{
-		return EncodingFormatToString(m_format);
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// init image blocks prior to encoding
-	// init block sorter for subsequent sortings
-	// check for encoding warnings
-	//
-	void Image::InitBlocksAndBlockSorter(void)
-	{
-		
-		FindEncodingWarningTypesForCurFormat();
-
-		// init each block
-		Block4x4 *pblock = m_pablock;
-		unsigned char *paucEncodingBits = m_paucEncodingBits;
-		for (unsigned int uiBlockRow = 0; uiBlockRow < m_uiBlockRows; uiBlockRow++)
-		{
-			unsigned int uiBlockV = uiBlockRow * 4;
-
-			for (unsigned int uiBlockColumn = 0; uiBlockColumn < m_uiBlockColumns; uiBlockColumn++)
-			{
-				unsigned int uiBlockH = uiBlockColumn * 4;
-
-				pblock->InitFromSource(this, uiBlockH, uiBlockV, paucEncodingBits, m_errormetric);
-
-				paucEncodingBits += Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-
-				pblock++;
-			}
-		}
-
-		FindAndSetEncodingWarnings();
-
-		// init block sorter
-		{
-			m_psortedblocklist = new SortedBlockList(GetNumberOfBlocks(), 100);
-
-			for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
-			{
-				pblock = &m_pablock[uiBlock];
-				m_psortedblocklist->AddBlock(pblock);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// run the first pass of the encoder
-	// the encoder generally finds a reasonable, fast encoding
-	// this is run on all blocks regardless of effort to ensure that all blocks have a valid encoding
-	//
-	void Image::RunFirstPass(unsigned int a_uiMultithreadingOffset, unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-
-		for (unsigned int uiBlock = a_uiMultithreadingOffset;
-				uiBlock < GetNumberOfBlocks(); 
-				uiBlock += a_uiMultithreadingStride)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			pblock->PerformEncodingIteration(m_fEffort);
-		}
-	}
-
-    // ----------------------------------------------------------------------------------------------------
-	// set the encoding bits (for the output file) based on the best encoding for each block
-	//
-	void Image::SetEncodingBits(unsigned int a_uiMultithreadingOffset,
-								unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-
-		for (unsigned int uiBlock = a_uiMultithreadingOffset; 
-				uiBlock < GetNumberOfBlocks(); 
-				uiBlock += a_uiMultithreadingStride)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			pblock->SetEncodingBitsFromEncoding();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return the image error
-	// image error is the sum of all block errors
-	//
-	float Image::GetError(void)
-	{
-		float fError = 0.0f;
-
-		for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			fError += pblock->GetError();
-		}
-
-		return fError;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// determine the encoding bits format based on the encoding format
-	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
-	//
-	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
-	{
-		Block4x4EncodingBits::Format encodingbitsformat;
-
-		// determine encoding bits format from image format
-		switch (a_format)
-		{
-		case Format::ETC1:
-		case Format::RGB8:
-		case Format::SRGB8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
-			break;
-
-		case Format::RGBA8:
-		case Format::SRGBA8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
-			break;
-
-		case Format::R11:
-		case Format::SIGNED_R11:
-			encodingbitsformat = Block4x4EncodingBits::Format::R11;
-			break;
-
-		case Format::RG11:
-		case Format::SIGNED_RG11:
-			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
-			break;
-
-		case Format::RGB8A1:
-		case Format::SRGB8A1:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
-			break;
-
-		default:
-			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-			break;
-		}
-
-		return encodingbitsformat;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}	// namespace Etc
diff --git a/thirdparty/etc2comp/EtcImage.h b/thirdparty/etc2comp/EtcImage.h
deleted file mode 100644
index bd807ac32e..0000000000
--- a/thirdparty/etc2comp/EtcImage.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//#include "Etc.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcErrorMetric.h"
-
-
-namespace Etc
-{
-	class Block4x4;
-	class EncoderSpec;
-	class SortedBlockList;
-
-    class Image
-    {
-    public:
-
-		//the differnt warning and errors that can come up during encoding
-		enum  EncodingStatus
-		{
-			SUCCESS = 0,
-			//
-			WARNING_THRESHOLD = 1 << 0,
-			//
-			WARNING_EFFORT_OUT_OF_RANGE = 1 << 1,
-			WARNING_JOBS_OUT_OF_RANGE = 1 << 2,
-			WARNING_SOME_NON_OPAQUE_PIXELS = 1 << 3,//just for opaque formats, etc1, rgb8, r11, rg11
-			WARNING_ALL_OPAQUE_PIXELS = 1 << 4,
-			WARNING_ALL_TRANSPARENT_PIXELS = 1 << 5,
-			WARNING_SOME_TRANSLUCENT_PIXELS = 1 << 6,//just for rgb8A1
-			WARNING_SOME_RGBA_NOT_0_TO_1 = 1 << 7,
-			WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO = 1 << 8,
-			WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO = 1 << 9,
-			//
-			ERROR_THRESHOLD = 1 << 16,
-			//
-			ERROR_UNKNOWN_FORMAT = 1 << 17,
-			ERROR_UNKNOWN_ERROR_METRIC = 1 << 18,
-			ERROR_ZERO_WIDTH_OR_HEIGHT = 1 << 19,
-			//
-		};
-		
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			ETC1,
-			//
-			// ETC2 formats
-			RGB8,
-			SRGB8,
-			RGBA8,
-			SRGBA8,
-			R11,
-			SIGNED_R11,
-			RG11,
-			SIGNED_RG11,
-			RGB8A1,
-			SRGB8A1,
-			//
-			FORMATS,
-			//
-			DEFAULT = SRGB8
-		};
-
-		// constructor using source image
-		Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
-				unsigned int a_uiSourceHeight,
-				ErrorMetric a_errormetric);
-
-		// constructor using encoding bits
-		Image(Format a_format, 
-				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
-				Image *a_pimageSource,
-				ErrorMetric a_errormetric);
-
-		~Image(void);
-
-		EncodingStatus Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, 
-			unsigned int a_uiJobs, unsigned int a_uiMaxJobs);
-
-		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
-		{
-			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
-		}
-		
-		inline unsigned int GetSourceWidth(void)
-		{
-			return m_uiSourceWidth;
-		}
-
-		inline unsigned int GetSourceHeight(void)
-		{
-			return m_uiSourceHeight;
-		}
-
-		inline unsigned int GetExtendedWidth(void)
-		{
-			return m_uiExtendedWidth;
-		}
-
-		inline unsigned int GetExtendedHeight(void)
-		{
-			return m_uiExtendedHeight;
-		}
-
-		inline unsigned int GetNumberOfBlocks()
-		{
-			return m_uiBlockColumns * m_uiBlockRows;
-		}
-
-		inline Block4x4 * GetBlocks()
-		{
-			return m_pablock;
-		}
-
-		inline unsigned char * GetEncodingBits(void)
-		{
-			return m_paucEncodingBits;
-		}
-
-		inline unsigned int GetEncodingBitsBytes(void)
-		{
-			return m_uiEncodingBitsBytes;
-		}
-
-		inline int GetEncodingTimeMs(void)
-		{
-			return m_iEncodeTime_ms;
-		}
-
-		float GetError(void);
-
-		inline ColorFloatRGBA * GetSourcePixel(unsigned int a_uiH, unsigned int a_uiV)
-		{
-			if (a_uiH >= m_uiSourceWidth || a_uiV >= m_uiSourceHeight)
-			{
-				return nullptr;
-			}
-
-			return &m_pafrgbaSource[a_uiV*m_uiSourceWidth + a_uiH];
-		}
-
-		inline Format GetFormat(void)
-		{
-			return m_format;
-		}
-
-		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
-
-		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
-		{
-			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
-		}
-
-		inline ErrorMetric GetErrorMetric(void)
-		{
-			return m_errormetric;
-		}
-
-		static const char * EncodingFormatToString(Image::Format a_format);
-		const char * EncodingFormatToString(void);
-		//used to get basic information about the image data
-		int m_iNumOpaquePixels;
-		int m_iNumTranslucentPixels;
-		int m_iNumTransparentPixels;
-
-		ColorFloatRGBA m_numColorValues;
-		ColorFloatRGBA m_numOutOfRangeValues;
-
-		bool m_bVerboseOutput;
-	private:
-		//add a warning or error to check for while encoding
-		inline void TrackEncodingWarning(EncodingStatus a_encStatus)
-		{
-			m_warningsToCapture = (EncodingStatus)((unsigned int)m_warningsToCapture | (unsigned int)a_encStatus);
-		}
-
-		//report the warning if it is something we care about for this encoding
-		inline void AddToEncodingStatusIfSignfigant(EncodingStatus a_encStatus)
-		{
-			if ((EncodingStatus)((unsigned int)m_warningsToCapture & (unsigned int)a_encStatus) == a_encStatus)
-			{
-				AddToEncodingStatus(a_encStatus);
-			}
-		}
-
-		Image(void);
-		void FindEncodingWarningTypesForCurFormat();
-		void FindAndSetEncodingWarnings();
-
-		void InitBlocksAndBlockSorter(void);
-
-		void RunFirstPass(unsigned int a_uiMultithreadingOffset, 
-							unsigned int a_uiMultithreadingStride);
-
-		void SetEncodingBits(unsigned int a_uiMultithreadingOffset,
-								unsigned int a_uiMultithreadingStride);
-
-		unsigned int IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks,
-												unsigned int a_uiMultithreadingOffset,
-												unsigned int a_uiMultithreadingStride);
-
-		// inputs
-		ColorFloatRGBA *m_pafrgbaSource;
-		unsigned int m_uiSourceWidth;
-		unsigned int m_uiSourceHeight;
-		unsigned int m_uiExtendedWidth;
-		unsigned int m_uiExtendedHeight;
-		unsigned int m_uiBlockColumns;
-		unsigned int m_uiBlockRows;
-		// intermediate data
-		Block4x4 *m_pablock;
-		// encoding
-		Format m_format;
-		Block4x4EncodingBits::Format m_encodingbitsformat;
-		unsigned int m_uiEncodingBitsBytes;		// for entire image
-		unsigned char *m_paucEncodingBits;
-		ErrorMetric m_errormetric;
-		float m_fEffort;
-		// stats
-		int m_iEncodeTime_ms;
-		
-		SortedBlockList *m_psortedblocklist;
-		//this will hold any warning or errors that happen during encoding
-		EncodingStatus m_encodingStatus;
-		//these will be the warnings we are tracking
-		EncodingStatus m_warningsToCapture;
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcIndividualTrys.cpp b/thirdparty/etc2comp/EtcIndividualTrys.cpp
deleted file mode 100644
index 56ff4c65ec..0000000000
--- a/thirdparty/etc2comp/EtcIndividualTrys.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcIndividualTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcIndividualTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-									const unsigned int *a_pauiPixelMapping1,
-									const unsigned int *a_pauiPixelMapping2,
-									unsigned int a_uiRadius)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
-		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
-									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcIndividualTrys.h b/thirdparty/etc2comp/EtcIndividualTrys.h
deleted file mode 100644
index 5fb12fbcf4..0000000000
--- a/thirdparty/etc2comp/EtcIndividualTrys.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class IndividualTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 1;
-
-		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
-						ColorFloatRGBA a_frgbaColor2,
-						const unsigned int *a_pauiPixelMapping1,
-						const unsigned int *a_pauiPixelMapping2,
-						unsigned int a_uiRadius);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (15- a_iDistance))
-			{
-				return (15 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 27;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcMath.cpp b/thirdparty/etc2comp/EtcMath.cpp
deleted file mode 100644
index 096d5f7ab9..0000000000
--- a/thirdparty/etc2comp/EtcMath.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "EtcMath.h"
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
-	// use a_fSlope and a_fOffset to define that line
-	//
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset)
-	{
-		float fPoints = (float)a_Points;
-
-		float fSumX = 0.0f;
-		float fSumY = 0.0f;
-		float fSumXY = 0.0f;
-		float fSumX2 = 0.0f;
-
-		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
-		{
-			fSumX += a_afX[uiPoint];
-			fSumY += a_afY[uiPoint];
-			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
-			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
-		}
-
-		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
-
-		// if vertical line
-		if (fDivisor == 0.0f)
-		{
-			*a_fSlope = 0.0f;
-			*a_fOffset = 0.0f;
-			return true;
-		}
-
-		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
-		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
-
-		return false;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcMath.h b/thirdparty/etc2comp/EtcMath.h
deleted file mode 100644
index c58c9a91bc..0000000000
--- a/thirdparty/etc2comp/EtcMath.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// return true if vertical line
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset);
-
-	inline float ConvertMSEToPSNR(float a_fMSE)
-	{
-		if (a_fMSE == 0.0f)
-		{
-			return INFINITY;
-		}
-
-		return 10.0f * log10f(1.0f / a_fMSE);
-	}
-
-
-}
diff --git a/thirdparty/etc2comp/EtcSortedBlockList.cpp b/thirdparty/etc2comp/EtcSortedBlockList.cpp
deleted file mode 100644
index bfa6b7b3fa..0000000000
--- a/thirdparty/etc2comp/EtcSortedBlockList.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcSortedBlockList.cpp
-
-SortedBlockList is a list of 4x4 blocks that can be used by the "effort" system to prioritize
-the encoding of the 4x4 blocks.
-
-The sorting is done with buckets, where each bucket is an indication of how much error each 4x4 block has
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcSortedBlockList.h"
-
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct an empty list
-	//
-	// allocate enough memory to add all of the image's 4x4 blocks later
-	// allocate enough buckets to sort the blocks
-	//
-	SortedBlockList::SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets)
-	{
-		m_uiImageBlocks = a_uiImageBlocks;
-		m_iBuckets = (int)a_uiBuckets;
-
-		m_uiAddedBlocks = 0;
-		m_uiSortedBlocks = 0;
-		m_palinkPool = new Link[m_uiImageBlocks];
-		m_pabucket = new Bucket[m_iBuckets];
-		m_fMaxError = 0.0f;
-
-		InitBuckets();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	SortedBlockList::~SortedBlockList(void)
-	{
-		delete[] m_palinkPool;
-		delete[] m_pabucket;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-    // add a 4x4 block to the list
-	// the 4x4 block will be sorted later
-	//
-    void SortedBlockList::AddBlock(Block4x4 *a_pblock)
-    {
-        assert(m_uiAddedBlocks < m_uiImageBlocks);
-        Link *plink = &m_palinkPool[m_uiAddedBlocks++];
-		plink->Init(a_pblock);
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// sort all of the 4x4 blocks that have been added to the list
-	//
-	// first, determine the maximum error, then assign an error range to each bucket
-	// next, determine which bucket each 4x4 block belongs to based on the 4x4 block's error
-	// add the 4x4 block to the appropriate bucket
-	// lastly, walk thru the buckets and add each bucket to a sorted linked list
-	//
-	// the resultant sorting is an approximate sorting from most to least error
-	//
-    void SortedBlockList::Sort(void)
-    {
-		assert(m_uiAddedBlocks == m_uiImageBlocks);
-        InitBuckets();
-
-        // find max block error
-        m_fMaxError = -1.0f;
-
-        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
-        {
-            Link *plinkBlock = &m_palinkPool[uiLink];
-
-            float fBlockError = plinkBlock->GetBlock()->GetError();
-            if (fBlockError > m_fMaxError)
-            {
-                m_fMaxError = fBlockError;
-            }
-        }
-        // prevent divide by zero or divide by negative
-        if (m_fMaxError <= 0.0f)
-        {
-            m_fMaxError = 1.0f;
-        }
-		//used for debugging
-		//int numDone = 0;
-        // put all of the blocks with unfinished encodings into the appropriate bucket
-		m_uiSortedBlocks = 0;
-        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
-        {
-            Link *plinkBlock = &m_palinkPool[uiLink];
-
-			// if the encoding is done, don't add it to the list
-			if (plinkBlock->GetBlock()->GetEncoding()->IsDone())
-			{
-				//numDone++;
-				continue;
-			}
-
-            // calculate the appropriate sort bucket
-            float fBlockError = plinkBlock->GetBlock()->GetError();
-            int iBucket = (int) floorf(m_iBuckets * fBlockError / m_fMaxError);
-            // clamp to bucket index
-            iBucket = iBucket < 0 ? 0 : iBucket >= m_iBuckets ? m_iBuckets - 1 : iBucket;
-
-            // add block to bucket
-			{
-				Bucket *pbucket = &m_pabucket[iBucket];
-				if (pbucket->plinkLast)
-				{
-					pbucket->plinkLast->SetNext(plinkBlock);
-					pbucket->plinkLast = plinkBlock;
-				}
-				else
-				{
-					pbucket->plinkFirst = pbucket->plinkLast = plinkBlock;
-				}
-				plinkBlock->SetNext(nullptr);
-			}
-
-			m_uiSortedBlocks++;
-
-            if (0)
-            {
-                printf("%u: e=%.3f\n", uiLink, fBlockError);
-                Print();
-                printf("\n\n\n");
-            }
-        }
-		//printf("num blocks already done: %d\n",numDone);
-		//link the blocks together across buckets
-		m_plinkFirst = nullptr;
-		m_plinkLast = nullptr;
-		for (int iBucket = m_iBuckets - 1; iBucket >= 0; iBucket--)
-		{
-			Bucket *pbucket = &m_pabucket[iBucket];
-
-			if (pbucket->plinkFirst)
-			{
-				if (m_plinkFirst == nullptr)
-				{
-					m_plinkFirst = pbucket->plinkFirst;
-				}
-				else
-				{
-					assert(pbucket->plinkLast->GetNext() == nullptr);
-					m_plinkLast->SetNext(pbucket->plinkFirst);
-				}
-
-				m_plinkLast = pbucket->plinkLast;
-			}
-		}
-
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// clear all of the buckets.  normally done in preparation for a sort
-	//
-	void SortedBlockList::InitBuckets(void)
-    {
-        for (int iBucket = 0; iBucket < m_iBuckets; iBucket++)
-        {
-            Bucket *pbucket = &m_pabucket[iBucket];
-
-            pbucket->plinkFirst = 0;
-            pbucket->plinkLast = 0;
-        }
-    }
-
-    // ----------------------------------------------------------------------------------------------------
-    // print out the list of sorted 4x4 blocks
-	// normally used for debugging
-	//
-    void SortedBlockList::Print(void)
-    {
-        for (int iBucket = m_iBuckets-1; iBucket >= 0; iBucket--)
-        {
-            Bucket *pbucket = &m_pabucket[iBucket];
-
-            unsigned int uiBlocks = 0;
-            for (Link *plink = pbucket->plinkFirst; plink != nullptr; plink = plink->GetNext() )
-            {
-                uiBlocks++;
-
-				if (plink == pbucket->plinkLast)
-				{
-					break;
-				}
-            }
-
-            float fBucketError = m_fMaxError * iBucket / m_iBuckets;
-            float fBucketRMS = sqrtf(fBucketError / (4.0f*16.0f) );
-            printf("%3d: e=%.3f rms=%.6f %u\n", iBucket, fBucketError, fBucketRMS, uiBlocks);
-        }
-    }
-
-    // ----------------------------------------------------------------------------------------------------
-    //
-
-}   // namespace Etc
diff --git a/thirdparty/etc2comp/EtcSortedBlockList.h b/thirdparty/etc2comp/EtcSortedBlockList.h
deleted file mode 100644
index 960e8adc34..0000000000
--- a/thirdparty/etc2comp/EtcSortedBlockList.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace Etc
-{
-	class Block4x4;
-
-    class SortedBlockList
-    {
-    public:
-
-		class Link
-		{
-		public:
-
-			inline void Init(Block4x4 *a_pblock)
-			{
-				m_pblock = a_pblock;
-				m_plinkNext = nullptr;
-			}
-
-			inline Block4x4 * GetBlock(void)
-			{
-				return m_pblock;
-			}
-
-			inline void SetNext(Link *a_plinkNext)
-			{
-				m_plinkNext = a_plinkNext;
-			}
-
-			inline Link * GetNext(void)
-			{
-				return m_plinkNext;
-			}
-
-			inline Link * Advance(unsigned int a_uiSteps = 1)
-			{
-				Link *plink = this;
-
-				for (unsigned int uiStep = 0; uiStep < a_uiSteps; uiStep++)
-				{
-					if (plink == nullptr)
-					{
-						break;
-					}
-
-					plink = plink->m_plinkNext;
-				}
-
-				return plink;
-			}
-
-		private:
-
-			Block4x4 *m_pblock;
-			Link *m_plinkNext;
-		};
-
-		SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets);
-		~SortedBlockList(void);
-
-        void AddBlock(Block4x4 *a_pblock);
-
-        void Sort(void);
-
-		inline Link * GetLinkToFirstBlock(void)
-		{
-			return m_plinkFirst;
-		}
-
-		inline unsigned int GetNumberOfAddedBlocks(void)
-		{
-			return m_uiAddedBlocks;
-		}
-
-		inline unsigned int GetNumberOfSortedBlocks(void)
-		{
-			return m_uiSortedBlocks;
-		}
-
-		void Print(void);
-
-	private:
-
-        void InitBuckets(void);
-
-        class Bucket
-        {
-        public:
-            Link *plinkFirst;
-            Link *plinkLast;
-        };
-
-        unsigned int m_uiImageBlocks;
-        int m_iBuckets;
-
-		unsigned int m_uiAddedBlocks;
-		unsigned int m_uiSortedBlocks;
-		Link *m_palinkPool;
-        Bucket *m_pabucket;
-        float m_fMaxError;
-
-		Link *m_plinkFirst;
-		Link *m_plinkLast;
-
-    };
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/LICENSE b/thirdparty/etc2comp/LICENSE
deleted file mode 100644
index d645695673..0000000000
--- a/thirdparty/etc2comp/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/thirdparty/etc2comp/README.md b/thirdparty/etc2comp/README.md
deleted file mode 100644
index 2f4363d042..0000000000
--- a/thirdparty/etc2comp/README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# Etc2Comp - Texture to ETC2 compressor
-
-Etc2Comp is a command line tool that converts textures (e.g. bitmaps)
-into the [ETC2](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression)
-format. The tool is built with a focus on encoding performance
-to reduce the amount of time required to compile asset heavy applications as
-well as reduce overall application size.
-
-This repo provides source code that can be compiled into a binary. The
-binary can then be used to convert textures to the ETC2 format.
-
-Important: This is not an official Google product. It is an experimental
-library published as-is. Please see the CONTRIBUTORS.md file for information
-about questions or issues.
-
-## Setup
-This project uses [CMake](https://cmake.org/) to generate platform-specific
-build files:
- - Linux: make files
- - OS X: Xcode workspace files
- - Microsoft Windows: Visual Studio solution files
- - Note: CMake supports other formats, but this doc only provides steps for
- one of each platform for brevity.
-
-Refer to each platform's setup section to setup your environment and build
-an Etc2Comp binary. Then skip to the usage section of this page for examples
-of how to use the library.
-
-### Setup for OS X
- build tested on this config:
-  OS X 10.9.5 i7 16GB RAM
-  Xcode 5.1.1
-  cmake 3.2.3
-  
-Start by downloading and installing the following components if they are not
-already installed on your development machine.
- - *Xcode* version 5.1.1, or greater
- - [CMake](https://cmake.org/download/) version 3.2.3, or greater
-
-To build the Etc2Comp binary:
- 1. Open a *Terminal* window and navigate to the project directory.
- 1. Run `mkdir build_xcode`
- 1. Run `cd build_xcode`
- 1. Run `cmake -G Xcode ../`
- 1. Open *Xcode* and import the `build_xcode/EtcTest.xcodeproj` file.
- 1. Open the Product menu and choose Build For -> Running.
- 1. Once the build succeeds the binary located at `build_xcode/EtcTool/Debug/EtcTool`
-can be executed.
-
-Optional
-Xcode EtcTool ‘Run’ preferences
-note: if the build_xcode/EtcTest.xcodeproj is manually deleted then some Xcode preferences 
-will need to be set by hand after cmake is run (these prefs are retained across 
-cmake updates if the .xcodeproj is not deleted/removed)
-
-1. Set the active scheme to ‘EtcTool’
-1. Edit the scheme
-1. Select option ‘Run EtcTool’, then tab ‘Arguments’. 
-Add this launch argument: ‘-argfile ../../EtcTool/args.txt’
-1. Select tab ‘Options’ and set a custom working directory to: ‘$(SRCROOT)/Build_Xcode/EtcTool’
-
-### SetUp for Windows
-
-1. Open a *Terminal* window and navigate to the project directory.
-1. Run `mkdir build_vs`
-1. Run `cd build_vs`
-1. Run CMAKE, noting what build version you need, and pointing to the parent directory as the source root; 
-  For VS 2013 : `cmake -G "Visual Studio 12 2013 Win64" ../`
-  For VS 2015 : `cmake -G "Visual Studio 14 2015 Win64" ../`
-  NOTE: To see what supported Visual Studio outputs there are, run `cmake -G`
-1. open the 'EtcTest' solution
-1. make the 'EtcTool' project the start up project 
-1. (optional) in the project properties, under 'Debugging ->command arguments' 
-add the argfile textfile thats included in the EtcTool directory. 
-example: -argfile C:\etc2\EtcTool\Args.txt
-
-### Setup For Linux
-The Linux build was tested on this config:
-  Ubuntu desktop 14.04
-  gcc/g++ 4.8
-  cmake 2.8.12.2
-
-1. Verify linux has cmake and C++-11 capable g++ installed
-1. Open shell
-1. Run `mkdir build_linux`
-1. Run `cd build_linux`
-1. Run `cmake ../`
-1. Run `make`
-1. navigate to the newly created EtcTool directory `cd EtcTool`
-1. run the executable: `./EtcTool -argfile ../../EtcTool/args.txt`
-
-Skip to the <a href="#usage">Usage</a> section for more information about using the
-tool.
-
-## Usage
-
-### Command Line Usage
-EtcTool can be run from the command line with the following usage:
-    etctool.exe source_image [options ...] -output encoded_image
-
-The encoder will use an array of RGBA floats read from the source_image to create 
-an ETC1 or ETC2 encoded image in encoded_image.  The RGBA floats should be in the 
-range [0:1].
-
-Options:
-
-    -analyze <analysis_folder>
-    -argfile <arg_file>           additional command line arguments read from a file
-    -blockAtHV <H V>              encodes a single block that contains the
-                                  pixel specified by the H V coordinates
-    -compare <comparison_image>   compares source_image to comparison_image
-    -effort <amount>              number between 0 and 100 to specify the encoding quality 
-                                  (100 is the highest quality)
-    -errormetric <error_metric>   specify the error metric, the options are
-                                  rgba, rgbx, rec709, numeric and normalxyz
-    -format <etc_format>          ETC1, RGB8, SRGB8, RGBA8, SRGB8, RGB8A1,
-                                  SRGB8A1 or R11
-    -help                         prints this message
-    -jobs or -j <thread_count>    specifies the number of threads (default=1)
-    -normalizexyz                 normalize RGB to have a length of 1
-    -verbose or -v                shows status information during the encoding
-                                  process
-	-mipmaps or -m <mip_count>    sets the maximum number of mipaps to generate (default=1)
-	-mipwrap or -w <x|y|xy>       sets the mipmap filter wrap mode (default=clamp)
-
-* -analyze will run an analysis of the encoding and place it in folder 
-"analysis_folder" (e.g. ../analysis/kodim05).  within the analysis_folder, a folder 
-will be created with a name of the current date/time (e.g. 20151204_153306).  this 
-date/time folder is used to compare encodings of the same texture over time.  
-within the date/time folder is a text file with several encoding stats and a 2x png 
-image showing the encoding mode for each 4x4 block.
-
-* -argfile allows additional command line arguments to be placed in a text file
-
-* -blockAtHV selects the 4x4 pixel subset of the source image at position (H,V).  
-This is mainly used for debugging
-
-* -compare compares the source image to the created encoded image. The encoding
-will dictate what error analysis is used in the comparison.
-
-* -effort uses an "amount" between 0 and 100 to determine how much additional effort 
-to apply during the encoding.
-
-* -errormetric selects the fitting algorithm used by the encoder.  "rgba" calculates 
-RMS error using RGB components that are weighted by A.  "rgbx" calculates RMS error 
-using RGBA components, where A is treated as an additional data channel, instead of 
-as alpha.  "rec709" is similar to "rgba", except the RGB components are also weighted 
-according to Rec709.  "numeric" calculates RMS error using unweighted RGBA components.  
-"normalize" calculates error based on dot product and vector length for RGB and RMS 
-error for A.
-
-* -help prints out the usage message
-
-* -jobs enables multi-threading to speed up image encoding
-
-* -normalizexyz normalizes the source RGB to have a length of 1.
-
-* -verbose shows information on the current encoding process. It will then display the 
-PSNR and time time it took to encode the image.
-
-* -mipmaps takes an argument that specifies how many mipmaps to generate from the 
-source image.  The mipmaps are generated with a lanczos3 filter using edge clamping.
-If the mipmaps option is not specified no mipmaps are created.
-
-* -mipwrap takes an argument that specifies the mipmap filter wrap mode.  The options 
-are "x", "y" and "xy" which specify wrapping in x only, y only or x and y respectively.
-The default options are clamping in both x and y.
-
-Note: Path names can use slashes or backslashes.  The tool will convert the 
-slashes to the appropriate polarity for the current platform.
-
-
-## API
-
-The library supports two different APIs - a C-like API that is not heavily 
-class-based and a class-based API.
-
-main() in EtcTool.cpp contains an example of both APIs.
-
-The Encode() method now returns an EncodingStatus that contains bit flags for
-reporting various warnings and flags encountered when encoding.
-
-
-## Copyright
-Copyright 2015 Etc2Comp Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch b/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch
deleted file mode 100644
index ea9b5640b6..0000000000
--- a/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch
+++ /dev/null
@@ -1,224 +0,0 @@
-diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-index 5656556db9..5c7ebed788 100644
---- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-+++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-@@ -508,7 +508,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -519,7 +519,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -530,7 +530,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -545,7 +545,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -556,7 +556,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -567,7 +567,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-@@ -761,7 +761,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -772,7 +772,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -783,7 +783,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -798,7 +798,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -809,7 +809,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -820,7 +820,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-index ba2b42fb05..b94b64e68c 100644
---- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-+++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-@@ -847,7 +847,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -858,7 +858,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -869,7 +869,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -884,7 +884,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -895,7 +895,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -906,7 +906,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-@@ -1108,7 +1108,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -1119,7 +1119,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -1130,7 +1130,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -1145,7 +1145,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -1156,7 +1156,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -1167,7 +1167,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
diff --git a/thirdparty/etcpak/AUTHORS.txt b/thirdparty/etcpak/AUTHORS.txt
new file mode 100644
index 0000000000..e7bae62c85
--- /dev/null
+++ b/thirdparty/etcpak/AUTHORS.txt
@@ -0,0 +1,3 @@
+Bartosz Taudul <wolf@nereid.pl>
+Daniel Jungmann <el.3d.source@gmail.com>
+Florian Penzkofer <fp@nullptr.de>
diff --git a/thirdparty/etcpak/Bitmap.cpp b/thirdparty/etcpak/Bitmap.cpp
new file mode 100644
index 0000000000..ef318318ac
--- /dev/null
+++ b/thirdparty/etcpak/Bitmap.cpp
@@ -0,0 +1,216 @@
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <png.h>
+#include "lz4/lz4.h"
+
+#include "Bitmap.hpp"
+#include "Debug.hpp"
+
+Bitmap::Bitmap( const char* fn, unsigned int lines, bool bgr )
+    : m_block( nullptr )
+    , m_lines( lines )
+    , m_alpha( true )
+    , m_sema( 0 )
+{
+    FILE* f = fopen( fn, "rb" );
+    assert( f );
+
+    char buf[4];
+    fread( buf, 1, 4, f );
+    if( memcmp( buf, "raw4", 4 ) == 0 )
+    {
+        uint8_t a;
+        fread( &a, 1, 1, f );
+        m_alpha = a == 1;
+        uint32_t d;
+        fread( &d, 1, 4, f );
+        m_size.x = d;
+        fread( &d, 1, 4, f );
+        m_size.y = d;
+        DBGPRINT( "Raw bitmap " << fn << "  " << m_size.x << "x" << m_size.y );
+
+        assert( m_size.x % 4 == 0 );
+        assert( m_size.y % 4 == 0 );
+
+        int32_t csize;
+        fread( &csize, 1, 4, f );
+        char* cbuf = new char[csize];
+        fread( cbuf, 1, csize, f );
+        fclose( f );
+
+        m_block = m_data = new uint32_t[m_size.x*m_size.y];
+        m_linesLeft = m_size.y / 4;
+
+        LZ4_decompress_fast( cbuf, (char*)m_data, m_size.x*m_size.y*4 );
+        delete[] cbuf;
+
+        for( int i=0; i<m_size.y/4; i++ )
+        {
+            m_sema.unlock();
+        }
+    }
+    else
+    {
+        fseek( f, 0, SEEK_SET );
+
+        unsigned int sig_read = 0;
+        int bit_depth, color_type, interlace_type;
+
+        png_structp png_ptr = png_create_read_struct( PNG_LIBPNG_VER_STRING, NULL, NULL, NULL );
+        png_infop info_ptr = png_create_info_struct( png_ptr );
+        setjmp( png_jmpbuf( png_ptr ) );
+
+        png_init_io( png_ptr, f );
+        png_set_sig_bytes( png_ptr, sig_read );
+
+        png_uint_32 w, h;
+
+        png_read_info( png_ptr, info_ptr );
+        png_get_IHDR( png_ptr, info_ptr, &w, &h, &bit_depth, &color_type, &interlace_type, NULL, NULL );
+
+        m_size = v2i( w, h );
+
+        png_set_strip_16( png_ptr );
+        if( color_type == PNG_COLOR_TYPE_PALETTE )
+        {
+            png_set_palette_to_rgb( png_ptr );
+        }
+        else if( color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8 )
+        {
+            png_set_expand_gray_1_2_4_to_8( png_ptr );
+        }
+        if( png_get_valid( png_ptr, info_ptr, PNG_INFO_tRNS ) )
+        {
+            png_set_tRNS_to_alpha( png_ptr );
+        }
+        if( color_type == PNG_COLOR_TYPE_GRAY_ALPHA )
+        {
+            png_set_gray_to_rgb(png_ptr);
+        }
+        if( bgr )
+        {
+            png_set_bgr(png_ptr);
+        }
+
+        switch( color_type )
+        {
+        case PNG_COLOR_TYPE_PALETTE:
+            if( !png_get_valid( png_ptr, info_ptr, PNG_INFO_tRNS ) )
+            {
+                png_set_filler( png_ptr, 0xff, PNG_FILLER_AFTER );
+                m_alpha = false;
+            }
+            break;
+        case PNG_COLOR_TYPE_GRAY_ALPHA:
+            png_set_gray_to_rgb( png_ptr );
+            break;
+        case PNG_COLOR_TYPE_RGB:
+            png_set_filler( png_ptr, 0xff, PNG_FILLER_AFTER );
+            m_alpha = false;
+            break;
+        default:
+            break;
+        }
+
+        DBGPRINT( "Bitmap " << fn << "  " << w << "x" << h );
+
+        assert( w % 4 == 0 );
+        assert( h % 4 == 0 );
+
+        m_block = m_data = new uint32_t[w*h];
+        m_linesLeft = h / 4;
+
+        m_load = std::async( std::launch::async, [this, f, png_ptr, info_ptr]() mutable
+        {
+            auto ptr = m_data;
+            unsigned int lines = 0;
+            for( int i=0; i<m_size.y / 4; i++ )
+            {
+                for( int j=0; j<4; j++ )
+                {
+                    png_read_rows( png_ptr, (png_bytepp)&ptr, NULL, 1 );
+                    ptr += m_size.x;
+                }
+                lines++;
+                if( lines >= m_lines )
+                {
+                    lines = 0;
+                    m_sema.unlock();
+                }
+            }
+
+            if( lines != 0 )
+            {
+                m_sema.unlock();
+            }
+
+            png_read_end( png_ptr, info_ptr );
+            png_destroy_read_struct( &png_ptr, &info_ptr, NULL );
+            fclose( f );
+        } );
+    }
+}
+
+Bitmap::Bitmap( const v2i& size )
+    : m_data( new uint32_t[size.x*size.y] )
+    , m_block( nullptr )
+    , m_lines( 1 )
+    , m_linesLeft( size.y / 4 )
+    , m_size( size )
+    , m_sema( 0 )
+{
+}
+
+Bitmap::Bitmap( const Bitmap& src, unsigned int lines )
+    : m_lines( lines )
+    , m_alpha( src.Alpha() )
+    , m_sema( 0 )
+{
+}
+
+Bitmap::~Bitmap()
+{
+    delete[] m_data;
+}
+
+void Bitmap::Write( const char* fn )
+{
+    FILE* f = fopen( fn, "wb" );
+    assert( f );
+
+    png_structp png_ptr = png_create_write_struct( PNG_LIBPNG_VER_STRING, NULL, NULL, NULL );
+    png_infop info_ptr = png_create_info_struct( png_ptr );
+    setjmp( png_jmpbuf( png_ptr ) );
+    png_init_io( png_ptr, f );
+
+    png_set_IHDR( png_ptr, info_ptr, m_size.x, m_size.y, 8, PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE );
+
+    png_write_info( png_ptr, info_ptr );
+
+    uint32_t* ptr = m_data;
+    for( int i=0; i<m_size.y; i++ )
+    {
+        png_write_rows( png_ptr, (png_bytepp)(&ptr), 1 );
+        ptr += m_size.x;
+    }
+
+    png_write_end( png_ptr, info_ptr );
+    png_destroy_write_struct( &png_ptr, &info_ptr );
+
+    fclose( f );
+}
+
+const uint32_t* Bitmap::NextBlock( unsigned int& lines, bool& done )
+{
+    std::lock_guard<std::mutex> lock( m_lock );
+    lines = std::min( m_lines, m_linesLeft );
+    auto ret = m_block;
+    m_sema.lock();
+    m_block += m_size.x * 4 * lines;
+    m_linesLeft -= lines;
+    done = m_linesLeft == 0;
+    return ret;
+}
diff --git a/thirdparty/etcpak/Bitmap.hpp b/thirdparty/etcpak/Bitmap.hpp
new file mode 100644
index 0000000000..fae8c936ed
--- /dev/null
+++ b/thirdparty/etcpak/Bitmap.hpp
@@ -0,0 +1,50 @@
+#ifndef __DARKRL__BITMAP_HPP__
+#define __DARKRL__BITMAP_HPP__
+
+#include <future>
+#include <memory>
+#include <mutex>
+#include <stdint.h>
+
+#include "Semaphore.hpp"
+#include "Vector.hpp"
+
+enum class Channels
+{
+    RGB,
+    Alpha
+};
+
+class Bitmap
+{
+public:
+    Bitmap( const char* fn, unsigned int lines, bool bgr );
+    Bitmap( const v2i& size );
+    virtual ~Bitmap();
+
+    void Write( const char* fn );
+
+    uint32_t* Data() { if( m_load.valid() ) m_load.wait(); return m_data; }
+    const uint32_t* Data() const { if( m_load.valid() ) m_load.wait(); return m_data; }
+    const v2i& Size() const { return m_size; }
+    bool Alpha() const { return m_alpha; }
+
+    const uint32_t* NextBlock( unsigned int& lines, bool& done );
+
+protected:
+    Bitmap( const Bitmap& src, unsigned int lines );
+
+    uint32_t* m_data;
+    uint32_t* m_block;
+    unsigned int m_lines;
+    unsigned int m_linesLeft;
+    v2i m_size;
+    bool m_alpha;
+    Semaphore m_sema;
+    std::mutex m_lock;
+    std::future<void> m_load;
+};
+
+typedef std::shared_ptr<Bitmap> BitmapPtr;
+
+#endif
diff --git a/thirdparty/etcpak/BitmapDownsampled.cpp b/thirdparty/etcpak/BitmapDownsampled.cpp
new file mode 100644
index 0000000000..0eb0d81185
--- /dev/null
+++ b/thirdparty/etcpak/BitmapDownsampled.cpp
@@ -0,0 +1,86 @@
+#include <string.h>
+#include <utility>
+
+#include "BitmapDownsampled.hpp"
+#include "Debug.hpp"
+
+BitmapDownsampled::BitmapDownsampled( const Bitmap& bmp, unsigned int lines )
+    : Bitmap( bmp, lines )
+{
+    m_size.x = std::max( 1, bmp.Size().x / 2 );
+    m_size.y = std::max( 1, bmp.Size().y / 2 );
+
+    int w = std::max( m_size.x, 4 );
+    int h = std::max( m_size.y, 4 );
+
+    DBGPRINT( "Subbitmap " << m_size.x << "x" << m_size.y );
+
+    m_block = m_data = new uint32_t[w*h];
+
+    if( m_size.x < w || m_size.y < h )
+    {
+        memset( m_data, 0, w*h*sizeof( uint32_t ) );
+        m_linesLeft = h / 4;
+        unsigned int lines = 0;
+        for( int i=0; i<h/4; i++ )
+        {
+            for( int j=0; j<4; j++ )
+            {
+                lines++;
+                if( lines > m_lines )
+                {
+                    lines = 0;
+                    m_sema.unlock();
+                }
+            }
+        }
+        if( lines != 0 )
+        {
+            m_sema.unlock();
+        }
+    }
+    else
+    {
+        m_linesLeft = h / 4;
+        m_load = std::async( std::launch::async, [this, &bmp, w, h]() mutable
+        {
+            auto ptr = m_data;
+            auto src1 = bmp.Data();
+            auto src2 = src1 + bmp.Size().x;
+            unsigned int lines = 0;
+            for( int i=0; i<h/4; i++ )
+            {
+                for( int j=0; j<4; j++ )
+                {
+                    for( int k=0; k<m_size.x; k++ )
+                    {
+                        int r = ( ( *src1 & 0x000000FF ) + ( *(src1+1) & 0x000000FF ) + ( *src2 & 0x000000FF ) + ( *(src2+1) & 0x000000FF ) ) / 4;
+                        int g = ( ( ( *src1 & 0x0000FF00 ) + ( *(src1+1) & 0x0000FF00 ) + ( *src2 & 0x0000FF00 ) + ( *(src2+1) & 0x0000FF00 ) ) / 4 ) & 0x0000FF00;
+                        int b = ( ( ( *src1 & 0x00FF0000 ) + ( *(src1+1) & 0x00FF0000 ) + ( *src2 & 0x00FF0000 ) + ( *(src2+1) & 0x00FF0000 ) ) / 4 ) & 0x00FF0000;
+                        int a = ( ( ( ( ( *src1 & 0xFF000000 ) >> 8 ) + ( ( *(src1+1) & 0xFF000000 ) >> 8 ) + ( ( *src2 & 0xFF000000 ) >> 8 ) + ( ( *(src2+1) & 0xFF000000 ) >> 8 ) ) / 4 ) & 0x00FF0000 ) << 8;
+                        *ptr++ = r | g | b | a;
+                        src1 += 2;
+                        src2 += 2;
+                    }
+                    src1 += m_size.x * 2;
+                    src2 += m_size.x * 2;
+                }
+                lines++;
+                if( lines >= m_lines )
+                {
+                    lines = 0;
+                    m_sema.unlock();
+                }
+            }
+
+            if( lines != 0 )
+            {
+                m_sema.unlock();
+            }
+        } );
+    }
+}
+
+BitmapDownsampled::~BitmapDownsampled()
+{
+}
diff --git a/thirdparty/etcpak/BitmapDownsampled.hpp b/thirdparty/etcpak/BitmapDownsampled.hpp
new file mode 100644
index 0000000000..b7313808df
--- /dev/null
+++ b/thirdparty/etcpak/BitmapDownsampled.hpp
@@ -0,0 +1,13 @@
+#ifndef __DARKRL__BITMAPDOWNSAMPLED_HPP__
+#define __DARKRL__BITMAPDOWNSAMPLED_HPP__
+
+#include "Bitmap.hpp"
+
+class BitmapDownsampled : public Bitmap
+{
+public:
+    BitmapDownsampled( const Bitmap& bmp, unsigned int lines );
+    ~BitmapDownsampled();
+};
+
+#endif
diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp
new file mode 100644
index 0000000000..4906e69492
--- /dev/null
+++ b/thirdparty/etcpak/BlockData.cpp
@@ -0,0 +1,1296 @@
+#include <assert.h>
+#include <string.h>
+
+#include "BlockData.hpp"
+#include "ColorSpace.hpp"
+#include "Debug.hpp"
+#include "MipMap.hpp"
+#include "mmap.hpp"
+#include "ProcessRGB.hpp"
+#include "ProcessDxtc.hpp"
+#include "Tables.hpp"
+#include "TaskDispatch.hpp"
+
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    include <Windows.h>
+#    define _bswap(x) _byteswap_ulong(x)
+#    define _bswap64(x) _byteswap_uint64(x)
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+#ifndef _bswap
+#  define _bswap(x) __builtin_bswap32(x)
+#  define _bswap64(x) __builtin_bswap64(x)
+#endif
+
+static uint8_t table59T58H[8] = { 3,6,11,16,23,32,41,64 };
+
+BlockData::BlockData( const char* fn )
+    : m_file( fopen( fn, "rb" ) )
+{
+    assert( m_file );
+    fseek( m_file, 0, SEEK_END );
+    m_maplen = ftell( m_file );
+    fseek( m_file, 0, SEEK_SET );
+    m_data = (uint8_t*)mmap( nullptr, m_maplen, PROT_READ, MAP_SHARED, fileno( m_file ), 0 );
+
+    auto data32 = (uint32_t*)m_data;
+    if( *data32 == 0x03525650 )
+    {
+        // PVR
+        switch( *(data32+2) )
+        {
+        case 6:
+            m_type = Etc1;
+            break;
+        case 7:
+            m_type = Dxt1;
+            break;
+        case 11:
+            m_type = Dxt5;
+            break;
+        case 22:
+            m_type = Etc2_RGB;
+            break;
+        case 23:
+            m_type = Etc2_RGBA;
+            break;
+        default:
+            assert( false );
+            break;
+        }
+
+        m_size.y = *(data32+6);
+        m_size.x = *(data32+7);
+        m_dataOffset = 52 + *(data32+12);
+    }
+    else if( *data32 == 0x58544BAB )
+    {
+        // KTX
+        switch( *(data32+7) )
+        {
+        case 0x9274:
+            m_type = Etc2_RGB;
+            break;
+        case 0x9278:
+            m_type = Etc2_RGBA;
+            break;
+        default:
+            assert( false );
+            break;
+        }
+
+        m_size.x = *(data32+9);
+        m_size.y = *(data32+10);
+        m_dataOffset = sizeof( uint32_t ) * 17 + *(data32+15);
+    }
+    else
+    {
+        assert( false );
+    }
+}
+
+static uint8_t* OpenForWriting( const char* fn, size_t len, const v2i& size, FILE** f, int levels, BlockData::Type type )
+{
+    *f = fopen( fn, "wb+" );
+    assert( *f );
+    fseek( *f, len - 1, SEEK_SET );
+    const char zero = 0;
+    fwrite( &zero, 1, 1, *f );
+    fseek( *f, 0, SEEK_SET );
+
+    auto ret = (uint8_t*)mmap( nullptr, len, PROT_WRITE, MAP_SHARED, fileno( *f ), 0 );
+    auto dst = (uint32_t*)ret;
+
+    *dst++ = 0x03525650;  // version
+    *dst++ = 0;           // flags
+    switch( type )        // pixelformat[0]
+    {
+    case BlockData::Etc1:
+        *dst++ = 6;
+        break;
+    case BlockData::Etc2_RGB:
+        *dst++ = 22;
+        break;
+    case BlockData::Etc2_RGBA:
+        *dst++ = 23;
+        break;
+    case BlockData::Dxt1:
+        *dst++ = 7;
+        break;
+    case BlockData::Dxt5:
+        *dst++ = 11;
+        break;
+    default:
+        assert( false );
+        break;
+    }
+    *dst++ = 0;           // pixelformat[1]
+    *dst++ = 0;           // colourspace
+    *dst++ = 0;           // channel type
+    *dst++ = size.y;      // height
+    *dst++ = size.x;      // width
+    *dst++ = 1;           // depth
+    *dst++ = 1;           // num surfs
+    *dst++ = 1;           // num faces
+    *dst++ = levels;      // mipmap count
+    *dst++ = 0;           // metadata size
+
+    return ret;
+}
+
+static int AdjustSizeForMipmaps( const v2i& size, int levels )
+{
+    int len = 0;
+    v2i current = size;
+    for( int i=1; i<levels; i++ )
+    {
+        assert( current.x != 1 || current.y != 1 );
+        current.x = std::max( 1, current.x / 2 );
+        current.y = std::max( 1, current.y / 2 );
+        len += std::max( 4, current.x ) * std::max( 4, current.y ) / 2;
+    }
+    assert( current.x == 1 && current.y == 1 );
+    return len;
+}
+
+BlockData::BlockData( const char* fn, const v2i& size, bool mipmap, Type type )
+    : m_size( size )
+    , m_dataOffset( 52 )
+    , m_maplen( m_size.x*m_size.y/2 )
+    , m_type( type )
+{
+    assert( m_size.x%4 == 0 && m_size.y%4 == 0 );
+
+    uint32_t cnt = m_size.x * m_size.y / 16;
+    DBGPRINT( cnt << " blocks" );
+
+    int levels = 1;
+
+    if( mipmap )
+    {
+        levels = NumberOfMipLevels( size );
+        DBGPRINT( "Number of mipmaps: " << levels );
+        m_maplen += AdjustSizeForMipmaps( size, levels );
+    }
+
+    if( type == Etc2_RGBA || type == Dxt5 ) m_maplen *= 2;
+
+    m_maplen += m_dataOffset;
+    m_data = OpenForWriting( fn, m_maplen, m_size, &m_file, levels, type );
+}
+
+BlockData::BlockData( const v2i& size, bool mipmap, Type type )
+    : m_size( size )
+    , m_dataOffset( 52 )
+    , m_file( nullptr )
+    , m_maplen( m_size.x*m_size.y/2 )
+    , m_type( type )
+{
+    assert( m_size.x%4 == 0 && m_size.y%4 == 0 );
+    if( mipmap )
+    {
+        const int levels = NumberOfMipLevels( size );
+        m_maplen += AdjustSizeForMipmaps( size, levels );
+    }
+
+    if( type == Etc2_RGBA || type == Dxt5 ) m_maplen *= 2;
+
+    m_maplen += m_dataOffset;
+    m_data = new uint8_t[m_maplen];
+}
+
+BlockData::~BlockData()
+{
+    if( m_file )
+    {
+        munmap( m_data, m_maplen );
+        fclose( m_file );
+    }
+    else
+    {
+        delete[] m_data;
+    }
+}
+
+void BlockData::Process( const uint32_t* src, uint32_t blocks, size_t offset, size_t width, Channels type, bool dither )
+{
+    auto dst = ((uint64_t*)( m_data + m_dataOffset )) + offset;
+
+    if( type == Channels::Alpha )
+    {
+        if( m_type != Etc1 )
+        {
+            CompressEtc2Alpha( src, dst, blocks, width );
+        }
+        else
+        {
+            CompressEtc1Alpha( src, dst, blocks, width );
+        }
+    }
+    else
+    {
+        switch( m_type )
+        {
+        case Etc1:
+            if( dither )
+            {
+                CompressEtc1RgbDither( src, dst, blocks, width );
+            }
+            else
+            {
+                CompressEtc1Rgb( src, dst, blocks, width );
+            }
+            break;
+        case Etc2_RGB:
+            CompressEtc2Rgb( src, dst, blocks, width );
+            break;
+        case Dxt1:
+            if( dither )
+            {
+                CompressDxt1Dither( src, dst, blocks, width );
+            }
+            else
+            {
+                CompressDxt1( src, dst, blocks, width );
+            }
+            break;
+        default:
+            assert( false );
+            break;
+        }
+    }
+}
+
+void BlockData::ProcessRGBA( const uint32_t* src, uint32_t blocks, size_t offset, size_t width )
+{
+    auto dst = ((uint64_t*)( m_data + m_dataOffset )) + offset * 2;
+
+    switch( m_type )
+    {
+    case Etc2_RGBA:
+        CompressEtc2Rgba( src, dst, blocks, width );
+        break;
+    case Dxt5:
+        CompressDxt5( src, dst, blocks, width );
+        break;
+    default:
+        assert( false );
+        break;
+    }
+}
+
+namespace
+{
+
+static etcpak_force_inline int32_t expand6(uint32_t value)
+{
+    return (value << 2) | (value >> 4);
+}
+
+static etcpak_force_inline int32_t expand7(uint32_t value)
+{
+    return (value << 1) | (value >> 6);
+}
+
+static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t w )
+{
+    const auto r0 = ( block >> 24 ) & 0x1B;
+    const auto rh0 = ( r0 >> 3 ) & 0x3;
+    const auto rl0 = r0 & 0x3;
+    const auto g0 = ( block >> 20 ) & 0xF;
+    const auto b0 = ( block >> 16 ) & 0xF;
+
+    const auto r1 = ( block >> 12 ) & 0xF;
+    const auto g1 = ( block >> 8 ) & 0xF;
+    const auto b1 = ( block >> 4 ) & 0xF;
+
+    const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
+    const auto cg0 = ( g0 << 4 ) | g0;
+    const auto cb0 = ( b0 << 4 ) | b0;
+
+    const auto cr1 = ( r1 << 4 ) | r1;
+    const auto cg1 = ( g1 << 4 ) | g1;
+    const auto cb1 = ( b1 << 4 ) | b1;
+
+    const auto codeword_hi = ( block >> 2 ) & 0x3;
+    const auto codeword_lo = block & 0x1;
+    const auto codeword = ( codeword_hi << 1 ) | codeword_lo;
+
+    const auto c2r = clampu8( cr1 + table59T58H[codeword] );
+    const auto c2g = clampu8( cg1 + table59T58H[codeword] );
+    const auto c2b = clampu8( cb1 + table59T58H[codeword] );
+
+    const auto c3r = clampu8( cr1 - table59T58H[codeword] );
+    const auto c3g = clampu8( cg1 - table59T58H[codeword] );
+    const auto c3b = clampu8( cb1 - table59T58H[codeword] );
+
+    const uint32_t col_tab[4] = {
+        uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000),
+        uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000),
+        uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000),
+        uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000)
+    };
+
+    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+    for( uint8_t j = 0; j < 4; j++ )
+    {
+        for( uint8_t i = 0; i < 4; i++ )
+        {
+            //2bit indices distributed on two lane 16bit numbers
+            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1) | ( ( indexes >> ( j + i * 4 ) ) & 0x1);
+            dst[j * w + i] = col_tab[index];
+        }
+    }
+}
+
+static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
+{
+    const auto r0 = ( block >> 24 ) & 0x1B;
+    const auto rh0 = ( r0 >> 3 ) & 0x3;
+    const auto rl0 = r0 & 0x3;
+    const auto g0 = ( block >> 20 ) & 0xF;
+    const auto b0 = ( block >> 16 ) & 0xF;
+
+    const auto r1 = ( block >> 12 ) & 0xF;
+    const auto g1 = ( block >> 8 ) & 0xF;
+    const auto b1 = ( block >> 4 ) & 0xF;
+
+    const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
+    const auto cg0 = ( g0 << 4 ) | g0;
+    const auto cb0 = ( b0 << 4 ) | b0;
+
+    const auto cr1 = ( r1 << 4 ) | r1;
+    const auto cg1 = ( g1 << 4 ) | g1;
+    const auto cb1 = ( b1 << 4 ) | b1;
+
+    const auto codeword_hi = ( block >> 2 ) & 0x3;
+    const auto codeword_lo = block & 0x1;
+    const auto codeword = (codeword_hi << 1) | codeword_lo;
+
+    const int32_t base = alpha >> 56;
+    const int32_t mul = ( alpha >> 52 ) & 0xF;
+    const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
+
+    const auto c2r = clampu8( cr1 + table59T58H[codeword] );
+    const auto c2g = clampu8( cg1 + table59T58H[codeword] );
+    const auto c2b = clampu8( cb1 + table59T58H[codeword] );
+
+    const auto c3r = clampu8( cr1 - table59T58H[codeword] );
+    const auto c3g = clampu8( cg1 - table59T58H[codeword] );
+    const auto c3b = clampu8( cb1 - table59T58H[codeword] );
+
+    const uint32_t col_tab[4] = {
+        uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 )),
+        uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 )),
+        uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 )),
+        uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ))
+    };
+
+    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+    for( uint8_t j = 0; j < 4; j++ )
+    {
+        for( uint8_t i = 0; i < 4; i++ )
+        {
+            //2bit indices distributed on two lane 16bit numbers
+            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
+            const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12 ) ) & 0x7];
+            const uint32_t a = clampu8( base + amod * mul );
+            dst[j * w + i] = col_tab[index] | ( a << 24 );
+        }
+    }
+}
+
+static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t w )
+{
+    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+
+    const auto r0444 = ( block >> 27 ) & 0xF;
+    const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
+    const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
+
+    const auto r1444 = ( block >> 11 ) & 0xF;
+    const auto g1444 = ( block >> 7 ) & 0xF;
+    const auto b1444 = ( block >> 3 ) & 0xF;
+
+    const auto r0 = ( r0444 << 4 ) | r0444;
+    const auto g0 = ( g0444 << 4 ) | g0444;
+    const auto b0 = ( b0444 << 4 ) | b0444;
+
+    const auto r1 = ( r1444 << 4 ) | r1444;
+    const auto g1 = ( g1444 << 4 ) | g1444;
+    const auto b1 = ( b1444 << 4 ) | b1444;
+
+    const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
+    const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
+    const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
+    const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
+    const auto codeword = codeword_hi | codeword_lo;
+
+    const uint32_t col_tab[] = {
+        uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ))
+    };
+
+    for( uint8_t j = 0; j < 4; j++ )
+    {
+        for( uint8_t i = 0; i < 4; i++ )
+        {
+            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
+            dst[j * w + i] = col_tab[index] | 0xFF000000;
+        }
+    }
+}
+
+static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
+{
+    const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+
+    const auto r0444 = ( block >> 27 ) & 0xF;
+    const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
+    const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
+
+    const auto r1444 = ( block >> 11 ) & 0xF;
+    const auto g1444 = ( block >> 7 ) & 0xF;
+    const auto b1444 = ( block >> 3 ) & 0xF;
+
+    const auto r0 = ( r0444 << 4 ) | r0444;
+    const auto g0 = ( g0444 << 4 ) | g0444;
+    const auto b0 = ( b0444 << 4 ) | b0444;
+
+    const auto r1 = ( r1444 << 4 ) | r1444;
+    const auto g1 = ( g1444 << 4 ) | g1444;
+    const auto b1 = ( b1444 << 4 ) | b1444;
+
+    const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
+    const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
+    const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
+    const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
+    const auto codeword = codeword_hi | codeword_lo;
+
+    const int32_t base = alpha >> 56;
+    const int32_t mul = ( alpha >> 52 ) & 0xF;
+    const auto tbl = g_alpha[(alpha >> 48) & 0xF];
+
+    const uint32_t col_tab[] = {
+        uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )),
+        uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ))
+    };
+
+    for( uint8_t j = 0; j < 4; j++ )
+    {
+        for( uint8_t i = 0; i < 4; i++ )
+        {
+            const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
+            const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12) ) & 0x7];
+            const uint32_t a = clampu8( base + amod * mul );
+            dst[j * w + i] = col_tab[index] | ( a << 24 );
+        }
+    }
+}
+
+static etcpak_force_inline void DecodePlanar( uint64_t block, uint32_t* dst, uint32_t w )
+{
+    const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
+    const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
+    const auto rv = expand6((block >> (13 + 32)) & 0x3F);
+
+    const auto bh = expand6((block >> (19 + 32)) & 0x3F);
+    const auto gh = expand7((block >> (25 + 32)) & 0x7F);
+
+    const auto rh0 = (block >> (32 - 32)) & 0x01;
+    const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
+    const auto rh = expand6(rh0 | rh1);
+
+    const auto bo0 = (block >> (39 - 32)) & 0x07;
+    const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
+    const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
+    const auto bo = expand6(bo0 | bo1 | bo2);
+    const auto go0 = (block >> (49 - 32)) & 0x3F;
+    const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
+    const auto go = expand7(go0 | go1);
+    const auto ro = expand6((block >> (57 - 32)) & 0x3F);
+
+#ifdef __ARM_NEON
+    uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
+    int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+    init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
+    int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+    init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ) | ( uint64_t(0xFFF) << 48 );
+    int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            uint8x8_t c = vqshrun_n_s16( col, 2 );
+            vst1_lane_u32( dst+j*w+i, vreinterpret_u32_u8( c ), 0 );
+            col = vaddq_s16( col, chco );
+        }
+        col = vaddq_s16( col, cvco );
+    }
+#elif defined __AVX2__
+    const auto R0 = 4*ro+2;
+    const auto G0 = 4*go+2;
+    const auto B0 = 4*bo+2;
+    const auto RHO = rh-ro;
+    const auto GHO = gh-go;
+    const auto BHO = bh-bo;
+
+    __m256i cvco = _mm256_setr_epi16( rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0 );
+    __m256i col = _mm256_setr_epi16( R0, G0, B0, 0xFFF, R0+RHO, G0+GHO, B0+BHO, 0xFFF, R0+2*RHO, G0+2*GHO, B0+2*BHO, 0xFFF, R0+3*RHO, G0+3*GHO, B0+3*BHO, 0xFFF );
+
+    for( int j=0; j<4; j++ )
+    {
+        __m256i c = _mm256_srai_epi16( col, 2 );
+        __m128i s = _mm_packus_epi16( _mm256_castsi256_si128( c ), _mm256_extracti128_si256( c, 1 ) );
+        _mm_storeu_si128( (__m128i*)(dst+j*w), s );
+        col = _mm256_add_epi16( col, cvco );
+    }
+#elif defined __SSE4_1__
+    __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
+    __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
+    __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0xFFF, 0, 0, 0, 0 );
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            __m128i c = _mm_srai_epi16( col, 2 );
+            __m128i s = _mm_packus_epi16( c, c );
+            dst[j*w+i] = _mm_cvtsi128_si32( s );
+            col = _mm_add_epi16( col, chco );
+        }
+        col = _mm_add_epi16( col, cvco );
+    }
+#else
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
+            const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
+            const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
+            if( ( ( r | g | b ) & ~0xFF ) == 0 )
+            {
+                dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
+            }
+            else
+            {
+                const auto rc = clampu8( r );
+                const auto gc = clampu8( g );
+                const auto bc = clampu8( b );
+                dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
+            }
+        }
+    }
+#endif
+}
+
+static etcpak_force_inline void DecodePlanarAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
+{
+    const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
+    const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
+    const auto rv = expand6((block >> (13 + 32)) & 0x3F);
+
+    const auto bh = expand6((block >> (19 + 32)) & 0x3F);
+    const auto gh = expand7((block >> (25 + 32)) & 0x7F);
+
+    const auto rh0 = (block >> (32 - 32)) & 0x01;
+    const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
+    const auto rh = expand6(rh0 | rh1);
+
+    const auto bo0 = (block >> (39 - 32)) & 0x07;
+    const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
+    const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
+    const auto bo = expand6(bo0 | bo1 | bo2);
+    const auto go0 = (block >> (49 - 32)) & 0x3F;
+    const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
+    const auto go = expand7(go0 | go1);
+    const auto ro = expand6((block >> (57 - 32)) & 0x3F);
+
+    const int32_t base = alpha >> 56;
+    const int32_t mul = ( alpha >> 52 ) & 0xF;
+    const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
+
+#ifdef __ARM_NEON
+    uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
+    int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+    init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
+    int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+    init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 );
+    int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
+            const uint32_t a = clampu8( base + amod * mul );
+            uint8x8_t c = vqshrun_n_s16( col, 2 );
+            dst[j*w+i] = vget_lane_u32( vreinterpret_u32_u8( c ), 0 ) | ( a << 24 );
+            col = vaddq_s16( col, chco );
+        }
+        col = vaddq_s16( col, cvco );
+    }
+#elif defined __SSE4_1__
+    __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
+    __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
+    __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0, 0, 0, 0, 0 );
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
+            const uint32_t a = clampu8( base + amod * mul );
+            __m128i c = _mm_srai_epi16( col, 2 );
+            __m128i s = _mm_packus_epi16( c, c );
+            dst[j*w+i] = _mm_cvtsi128_si32( s ) | ( a << 24 );
+            col = _mm_add_epi16( col, chco );
+        }
+        col = _mm_add_epi16( col, cvco );
+    }
+#else
+    for (auto j = 0; j < 4; j++)
+    {
+        for (auto i = 0; i < 4; i++)
+        {
+            const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
+            const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
+            const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
+            const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
+            const uint32_t a = clampu8( base + amod * mul );
+            if( ( ( r | g | b ) & ~0xFF ) == 0 )
+            {
+                dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
+            }
+            else
+            {
+                const auto rc = clampu8( r );
+                const auto gc = clampu8( g );
+                const auto bc = clampu8( b );
+                dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
+            }
+        }
+    }
+#endif
+}
+
+}
+
+BitmapPtr BlockData::Decode()
+{
+    switch( m_type )
+    {
+    case Etc1:
+    case Etc2_RGB:
+        return DecodeRGB();
+    case Etc2_RGBA:
+        return DecodeRGBA();
+    case Dxt1:
+        return DecodeDxt1();
+    case Dxt5:
+        return DecodeDxt5();
+    default:
+        assert( false );
+        return nullptr;
+    }
+}
+
+static etcpak_force_inline uint64_t ConvertByteOrder( uint64_t d )
+{
+    uint32_t word[2];
+    memcpy( word, &d, 8 );
+    word[0] = _bswap( word[0] );
+    word[1] = _bswap( word[1] );
+    memcpy( &d, word, 8 );
+    return d;
+}
+
+static etcpak_force_inline void DecodeRGBPart( uint64_t d, uint32_t* dst, uint32_t w )
+{
+    d = ConvertByteOrder( d );
+
+    uint32_t br[2], bg[2], bb[2];
+
+    if( d & 0x2 )
+    {
+        int32_t dr, dg, db;
+
+        uint32_t r0 = ( d & 0xF8000000 ) >> 27;
+        uint32_t g0 = ( d & 0x00F80000 ) >> 19;
+        uint32_t b0 = ( d & 0x0000F800 ) >> 11;
+
+        dr = ( int32_t(d) << 5 ) >> 29;
+        dg = ( int32_t(d) << 13 ) >> 29;
+        db = ( int32_t(d) << 21 ) >> 29;
+
+        int32_t r1 = int32_t(r0) + dr;
+        int32_t g1 = int32_t(g0) + dg;
+        int32_t b1 = int32_t(b0) + db;
+
+        // T mode
+        if ( (r1 < 0) || (r1 > 31) )
+        {
+            DecodeT( d, dst, w );
+            return;
+        }
+
+        // H mode
+        if ((g1 < 0) || (g1 > 31))
+        {
+            DecodeH( d, dst, w );
+            return;
+        }
+
+        // P mode
+        if( (b1 < 0) || (b1 > 31) )
+        {
+            DecodePlanar( d, dst, w );
+            return;
+        }
+
+        br[0] = ( r0 << 3 ) | ( r0 >> 2 );
+        br[1] = ( r1 << 3 ) | ( r1 >> 2 );
+        bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
+        bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
+        bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
+        bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
+    }
+    else
+    {
+        br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
+        br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
+        bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
+        bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
+        bb[0] = ( ( d & 0x0000F000 ) >> 8  ) | ( ( d & 0x0000F000 ) >> 12 );
+        bb[1] = ( ( d & 0x00000F00 ) >> 4  ) | ( ( d & 0x00000F00 ) >> 8  );
+    }
+
+    unsigned int tcw[2];
+    tcw[0] = ( d & 0xE0 ) >> 5;
+    tcw[1] = ( d & 0x1C ) >> 2;
+
+    uint32_t b1 = ( d >> 32 ) & 0xFFFF;
+    uint32_t b2 = ( d >> 48 );
+
+    b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
+    b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
+    b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
+    b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
+
+    b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
+    b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
+    b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
+    b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
+
+    uint32_t idx = b1 | ( b2 << 1 );
+
+    if( d & 0x1 )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            for( int j=0; j<4; j++ )
+            {
+                const auto mod = g_table[tcw[j/2]][idx & 0x3];
+                const auto r = br[j/2] + mod;
+                const auto g = bg[j/2] + mod;
+                const auto b = bb[j/2] + mod;
+                if( ( ( r | g | b ) & ~0xFF ) == 0 )
+                {
+                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
+                }
+                else
+                {
+                    const auto rc = clampu8( r );
+                    const auto gc = clampu8( g );
+                    const auto bc = clampu8( b );
+                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
+                }
+                idx >>= 2;
+            }
+        }
+    }
+    else
+    {
+        for( int i=0; i<4; i++ )
+        {
+            const auto tbl = g_table[tcw[i/2]];
+            const auto cr = br[i/2];
+            const auto cg = bg[i/2];
+            const auto cb = bb[i/2];
+
+            for( int j=0; j<4; j++ )
+            {
+                const auto mod = tbl[idx & 0x3];
+                const auto r = cr + mod;
+                const auto g = cg + mod;
+                const auto b = cb + mod;
+                if( ( ( r | g | b ) & ~0xFF ) == 0 )
+                {
+                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
+                }
+                else
+                {
+                    const auto rc = clampu8( r );
+                    const auto gc = clampu8( g );
+                    const auto bc = clampu8( b );
+                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
+                }
+                idx >>= 2;
+            }
+        }
+    }
+}
+
+static etcpak_force_inline void DecodeRGBAPart( uint64_t d, uint64_t alpha, uint32_t* dst, uint32_t w )
+{
+    d = ConvertByteOrder( d );
+    alpha = _bswap64( alpha );
+
+    uint32_t br[2], bg[2], bb[2];
+
+    if( d & 0x2 )
+    {
+        int32_t dr, dg, db;
+
+        uint32_t r0 = ( d & 0xF8000000 ) >> 27;
+        uint32_t g0 = ( d & 0x00F80000 ) >> 19;
+        uint32_t b0 = ( d & 0x0000F800 ) >> 11;
+
+        dr = ( int32_t(d) << 5 ) >> 29;
+        dg = ( int32_t(d) << 13 ) >> 29;
+        db = ( int32_t(d) << 21 ) >> 29;
+
+        int32_t r1 = int32_t(r0) + dr;
+        int32_t g1 = int32_t(g0) + dg;
+        int32_t b1 = int32_t(b0) + db;
+
+        // T mode
+        if ( (r1 < 0) || (r1 > 31) )
+        {
+            DecodeTAlpha( d, alpha, dst, w );
+            return;
+        }
+
+        // H mode
+        if ( (g1 < 0) || (g1 > 31) )
+        {
+            DecodeHAlpha( d, alpha, dst, w );
+            return;
+        }
+
+        // P mode
+        if ( (b1 < 0) || (b1 > 31) )
+        {
+            DecodePlanarAlpha( d, alpha, dst, w );
+            return;
+        }
+
+        br[0] = ( r0 << 3 ) | ( r0 >> 2 );
+        br[1] = ( r1 << 3 ) | ( r1 >> 2 );
+        bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
+        bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
+        bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
+        bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
+    }
+    else
+    {
+        br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
+        br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
+        bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
+        bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
+        bb[0] = ( ( d & 0x0000F000 ) >> 8  ) | ( ( d & 0x0000F000 ) >> 12 );
+        bb[1] = ( ( d & 0x00000F00 ) >> 4  ) | ( ( d & 0x00000F00 ) >> 8  );
+    }
+
+    unsigned int tcw[2];
+    tcw[0] = ( d & 0xE0 ) >> 5;
+    tcw[1] = ( d & 0x1C ) >> 2;
+
+    uint32_t b1 = ( d >> 32 ) & 0xFFFF;
+    uint32_t b2 = ( d >> 48 );
+
+    b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
+    b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
+    b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
+    b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
+
+    b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
+    b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
+    b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
+    b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
+
+    uint32_t idx = b1 | ( b2 << 1 );
+
+    const int32_t base = alpha >> 56;
+    const int32_t mul = ( alpha >> 52 ) & 0xF;
+    const auto atbl = g_alpha[( alpha >> 48 ) & 0xF];
+
+    if( d & 0x1 )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            for( int j=0; j<4; j++ )
+            {
+                const auto mod = g_table[tcw[j/2]][idx & 0x3];
+                const auto r = br[j/2] + mod;
+                const auto g = bg[j/2] + mod;
+                const auto b = bb[j/2] + mod;
+                const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
+                const uint32_t a = clampu8( base + amod * mul );
+                if( ( ( r | g | b ) & ~0xFF ) == 0 )
+                {
+                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
+                }
+                else
+                {
+                    const auto rc = clampu8( r );
+                    const auto gc = clampu8( g );
+                    const auto bc = clampu8( b );
+                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
+                }
+                idx >>= 2;
+            }
+        }
+    }
+    else
+    {
+        for( int i=0; i<4; i++ )
+        {
+            const auto tbl = g_table[tcw[i/2]];
+            const auto cr = br[i/2];
+            const auto cg = bg[i/2];
+            const auto cb = bb[i/2];
+
+            for( int j=0; j<4; j++ )
+            {
+                const auto mod = tbl[idx & 0x3];
+                const auto r = cr + mod;
+                const auto g = cg + mod;
+                const auto b = cb + mod;
+                const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
+                const uint32_t a = clampu8( base + amod * mul );
+                if( ( ( r | g | b ) & ~0xFF ) == 0 )
+                {
+                    dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
+                }
+                else
+                {
+                    const auto rc = clampu8( r );
+                    const auto gc = clampu8( g );
+                    const auto bc = clampu8( b );
+                    dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
+                }
+                idx >>= 2;
+            }
+        }
+    }
+}
+
+BitmapPtr BlockData::DecodeRGB()
+{
+    auto ret = std::make_shared<Bitmap>( m_size );
+
+    const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset );
+    uint32_t* dst = ret->Data();
+
+    for( int y=0; y<m_size.y/4; y++ )
+    {
+        for( int x=0; x<m_size.x/4; x++ )
+        {
+            uint64_t d = *src++;
+            DecodeRGBPart( d, dst, m_size.x );
+            dst += 4;
+        }
+        dst += m_size.x*3;
+    }
+
+    return ret;
+}
+
+BitmapPtr BlockData::DecodeRGBA()
+{
+    auto ret = std::make_shared<Bitmap>( m_size );
+
+    const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset );
+    uint32_t* dst = ret->Data();
+
+    for( int y=0; y<m_size.y/4; y++ )
+    {
+        for( int x=0; x<m_size.x/4; x++ )
+        {
+            uint64_t a = *src++;
+            uint64_t d = *src++;
+            DecodeRGBAPart( d, a, dst, m_size.x );
+            dst += 4;
+        }
+        dst += m_size.x*3;
+    }
+
+    return ret;
+}
+
+static etcpak_force_inline void DecodeDxt1Part( uint64_t d, uint32_t* dst, uint32_t w )
+{
+    uint8_t* in = (uint8_t*)&d;
+    uint16_t c0, c1;
+    uint32_t idx;
+    memcpy( &c0, in, 2 );
+    memcpy( &c1, in+2, 2 );
+    memcpy( &idx, in+4, 4 );
+
+    uint8_t r0 = ( ( c0 & 0xF800 ) >> 8 ) | ( ( c0 & 0xF800 ) >> 13 );
+    uint8_t g0 = ( ( c0 & 0x07E0 ) >> 3 ) | ( ( c0 & 0x07E0 ) >> 9 );
+    uint8_t b0 = ( ( c0 & 0x001F ) << 3 ) | ( ( c0 & 0x001F ) >> 2 );
+
+    uint8_t r1 = ( ( c1 & 0xF800 ) >> 8 ) | ( ( c1 & 0xF800 ) >> 13 );
+    uint8_t g1 = ( ( c1 & 0x07E0 ) >> 3 ) | ( ( c1 & 0x07E0 ) >> 9 );
+    uint8_t b1 = ( ( c1 & 0x001F ) << 3 ) | ( ( c1 & 0x001F ) >> 2 );
+
+    uint32_t dict[4];
+
+    dict[0] = 0xFF000000 | ( b0 << 16 ) | ( g0 << 8 ) | r0;
+    dict[1] = 0xFF000000 | ( b1 << 16 ) | ( g1 << 8 ) | r1;
+
+    uint32_t r, g, b;
+    if( c0 > c1 )
+    {
+        r = (2*r0+r1)/3;
+        g = (2*g0+g1)/3;
+        b = (2*b0+b1)/3;
+        dict[2] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r;
+        r = (2*r1+r0)/3;
+        g = (2*g1+g0)/3;
+        b = (2*b1+b0)/3;
+        dict[3] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r;
+    }
+    else
+    {
+        r = (int(r0)+r1)/2;
+        g = (int(g0)+g1)/2;
+        b = (int(b0)+b1)/2;
+        dict[2] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r;
+        dict[3] = 0xFF000000;
+    }
+
+    memcpy( dst+0, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+1, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+2, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+3, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    dst += w;
+
+    memcpy( dst+0, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+1, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+2, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+3, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    dst += w;
+
+    memcpy( dst+0, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+1, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+2, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+3, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    dst += w;
+
+    memcpy( dst+0, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+1, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+2, dict + (idx & 0x3), 4 );
+    idx >>= 2;
+    memcpy( dst+3, dict + (idx & 0x3), 4 );
+}
+
+static etcpak_force_inline void DecodeDxt5Part( uint64_t a, uint64_t d, uint32_t* dst, uint32_t w )
+{
+    uint8_t* ain = (uint8_t*)&a;
+    uint8_t a0, a1;
+    uint64_t aidx = 0;
+    memcpy( &a0, ain, 1 );
+    memcpy( &a1, ain+1, 1 );
+    memcpy( &aidx, ain+2, 6 );
+
+    uint8_t* in = (uint8_t*)&d;
+    uint16_t c0, c1;
+    uint32_t idx;
+    memcpy( &c0, in, 2 );
+    memcpy( &c1, in+2, 2 );
+    memcpy( &idx, in+4, 4 );
+
+    uint32_t adict[8];
+    adict[0] = a0 << 24;
+    adict[1] = a1 << 24;
+    if( a0 > a1 )
+    {
+        adict[2] = ( (6*a0+1*a1)/7 ) << 24;
+        adict[3] = ( (5*a0+2*a1)/7 ) << 24;
+        adict[4] = ( (4*a0+3*a1)/7 ) << 24;
+        adict[5] = ( (3*a0+4*a1)/7 ) << 24;
+        adict[6] = ( (2*a0+5*a1)/7 ) << 24;
+        adict[7] = ( (1*a0+6*a1)/7 ) << 24;
+    }
+    else
+    {
+        adict[2] = ( (4*a0+1*a1)/5 ) << 24;
+        adict[3] = ( (3*a0+2*a1)/5 ) << 24;
+        adict[4] = ( (2*a0+3*a1)/5 ) << 24;
+        adict[5] = ( (1*a0+4*a1)/5 ) << 24;
+        adict[6] = 0;
+        adict[7] = 0xFF000000;
+    }
+
+    uint8_t r0 = ( ( c0 & 0xF800 ) >> 8 ) | ( ( c0 & 0xF800 ) >> 13 );
+    uint8_t g0 = ( ( c0 & 0x07E0 ) >> 3 ) | ( ( c0 & 0x07E0 ) >> 9 );
+    uint8_t b0 = ( ( c0 & 0x001F ) << 3 ) | ( ( c0 & 0x001F ) >> 2 );
+
+    uint8_t r1 = ( ( c1 & 0xF800 ) >> 8 ) | ( ( c1 & 0xF800 ) >> 13 );
+    uint8_t g1 = ( ( c1 & 0x07E0 ) >> 3 ) | ( ( c1 & 0x07E0 ) >> 9 );
+    uint8_t b1 = ( ( c1 & 0x001F ) << 3 ) | ( ( c1 & 0x001F ) >> 2 );
+
+    uint32_t dict[4];
+
+    dict[0] = ( b0 << 16 ) | ( g0 << 8 ) | r0;
+    dict[1] = ( b1 << 16 ) | ( g1 << 8 ) | r1;
+
+    uint32_t r, g, b;
+    if( c0 > c1 )
+    {
+        r = (2*r0+r1)/3;
+        g = (2*g0+g1)/3;
+        b = (2*b0+b1)/3;
+        dict[2] = ( b << 16 ) | ( g << 8 ) | r;
+        r = (2*r1+r0)/3;
+        g = (2*g1+g0)/3;
+        b = (2*b1+b0)/3;
+        dict[3] = ( b << 16 ) | ( g << 8 ) | r;
+    }
+    else
+    {
+        r = (int(r0)+r1)/2;
+        g = (int(g0)+g1)/2;
+        b = (int(b0)+b1)/2;
+        dict[2] = ( b << 16 ) | ( g << 8 ) | r;
+        dict[3] = 0;
+    }
+
+    dst[0] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[1] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[2] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[3] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst += w;
+
+    dst[0] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[1] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[2] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[3] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst += w;
+
+    dst[0] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[1] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[2] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[3] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst += w;
+
+    dst[0] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[1] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[2] = dict[idx & 0x3] | adict[aidx & 0x7];
+    idx >>= 2;
+    aidx >>= 3;
+    dst[3] = dict[idx & 0x3] | adict[aidx & 0x7];
+}
+
+BitmapPtr BlockData::DecodeDxt1()
+{
+    auto ret = std::make_shared<Bitmap>( m_size );
+
+    const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset );
+    uint32_t* dst = ret->Data();
+
+    for( int y=0; y<m_size.y/4; y++ )
+    {
+        for( int x=0; x<m_size.x/4; x++ )
+        {
+            uint64_t d = *src++;
+            DecodeDxt1Part( d, dst, m_size.x );
+            dst += 4;
+        }
+        dst += m_size.x*3;
+    }
+
+    return ret;
+}
+
+BitmapPtr BlockData::DecodeDxt5()
+{
+    auto ret = std::make_shared<Bitmap>( m_size );
+
+    const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset );
+    uint32_t* dst = ret->Data();
+
+    for( int y=0; y<m_size.y/4; y++ )
+    {
+        for( int x=0; x<m_size.x/4; x++ )
+        {
+            uint64_t a = *src++;
+            uint64_t d = *src++;
+            DecodeDxt5Part( a, d, dst, m_size.x );
+            dst += 4;
+        }
+        dst += m_size.x*3;
+    }
+
+    return ret;
+}
diff --git a/thirdparty/etcpak/BlockData.hpp b/thirdparty/etcpak/BlockData.hpp
new file mode 100644
index 0000000000..209e35b4e6
--- /dev/null
+++ b/thirdparty/etcpak/BlockData.hpp
@@ -0,0 +1,56 @@
+#ifndef __BLOCKDATA_HPP__
+#define __BLOCKDATA_HPP__
+
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <stdint.h>
+#include <stdio.h>
+#include <vector>
+
+#include "Bitmap.hpp"
+#include "ForceInline.hpp"
+#include "Vector.hpp"
+
+class BlockData
+{
+public:
+    enum Type
+    {
+        Etc1,
+        Etc2_RGB,
+        Etc2_RGBA,
+        Dxt1,
+        Dxt5
+    };
+
+    BlockData( const char* fn );
+    BlockData( const char* fn, const v2i& size, bool mipmap, Type type );
+    BlockData( const v2i& size, bool mipmap, Type type );
+    ~BlockData();
+
+    BitmapPtr Decode();
+
+    void Process( const uint32_t* src, uint32_t blocks, size_t offset, size_t width, Channels type, bool dither );
+    void ProcessRGBA( const uint32_t* src, uint32_t blocks, size_t offset, size_t width );
+
+    const v2i& Size() const { return m_size; }
+
+private:
+    etcpak_no_inline BitmapPtr DecodeRGB();
+    etcpak_no_inline BitmapPtr DecodeRGBA();
+    etcpak_no_inline BitmapPtr DecodeDxt1();
+    etcpak_no_inline BitmapPtr DecodeDxt5();
+
+    uint8_t* m_data;
+    v2i m_size;
+    size_t m_dataOffset;
+    FILE* m_file;
+    size_t m_maplen;
+    Type m_type;
+};
+
+typedef std::shared_ptr<BlockData> BlockDataPtr;
+
+#endif
diff --git a/thirdparty/etcpak/ColorSpace.cpp b/thirdparty/etcpak/ColorSpace.cpp
new file mode 100644
index 0000000000..0411541066
--- /dev/null
+++ b/thirdparty/etcpak/ColorSpace.cpp
@@ -0,0 +1,114 @@
+#include <math.h>
+#include <stdint.h>
+
+#include "Math.hpp"
+#include "ColorSpace.hpp"
+
+namespace Color
+{
+
+    static const XYZ white( v3b( 255, 255, 255 ) );
+    static const v3f rwhite( 1.f / white.x, 1.f / white.y, 1.f / white.z );
+
+
+    XYZ::XYZ( float _x, float _y, float _z )
+        : x( _x )
+        , y( _y )
+        , z( _z )
+    {
+    }
+
+    XYZ::XYZ( const v3b& rgb )
+    {
+        const float r = rgb.x / 255.f;
+        const float g = rgb.y / 255.f;
+        const float b = rgb.z / 255.f;
+
+        const float rl = sRGB2linear( r );
+        const float gl = sRGB2linear( g );
+        const float bl = sRGB2linear( b );
+
+        x = 0.4124f * rl + 0.3576f * gl + 0.1805f * bl;
+        y = 0.2126f * rl + 0.7152f * gl + 0.0722f * bl;
+        z = 0.0193f * rl + 0.1192f * gl + 0.9505f * bl;
+    }
+
+    static float revlab( float t )
+    {
+        const float p1 = 6.f/29.f;
+        const float p2 = 4.f/29.f;
+
+        if( t > p1 )
+        {
+            return t*t*t;
+        }
+        else
+        {
+            return 3 * sq( p1 ) * ( t - p2 );
+        }
+    }
+
+    XYZ::XYZ( const Lab& lab )
+    {
+        y = white.y * revlab( 1.f/116.f * ( lab.L + 16 ) );
+        x = white.x * revlab( 1.f/116.f * ( lab.L + 16 ) + 1.f/500.f * lab.a );
+        z = white.z * revlab( 1.f/116.f * ( lab.L + 16 ) - 1.f/200.f * lab.b );
+    }
+
+    v3i XYZ::RGB() const
+    {
+        const float rl =  3.2406f * x - 1.5372f * y - 0.4986f * z;
+        const float gl = -0.9689f * x + 1.8758f * y + 0.0415f * z;
+        const float bl =  0.0557f * x - 0.2040f * y + 1.0570f * z;
+
+        const float r = linear2sRGB( rl );
+        const float g = linear2sRGB( gl );
+        const float b = linear2sRGB( bl );
+
+        return v3i( clampu8( int32_t( r * 255 ) ), clampu8( int32_t( g * 255 ) ), clampu8( int32_t( b * 255 ) ) );
+    }
+
+
+    Lab::Lab()
+        : L( 0 )
+        , a( 0 )
+        , b( 0 )
+    {
+    }
+
+    Lab::Lab( float L, float a, float b )
+        : L( L )
+        , a( a )
+        , b( b )
+    {
+    }
+
+    static float labfunc( float t )
+    {
+        const float p1 = (6.f/29.f)*(6.f/29.f)*(6.f/29.f);
+        const float p2 = (1.f/3.f)*(29.f/6.f)*(29.f/6.f);
+        const float p3 = (4.f/29.f);
+
+        if( t > p1 )
+        {
+            return pow( t, 1.f/3.f );
+        }
+        else
+        {
+            return p2 * t + p3;
+        }
+    }
+
+    Lab::Lab( const XYZ& xyz )
+    {
+        L = 116 * labfunc( xyz.y * rwhite.y ) - 16;
+        a = 500 * ( labfunc( xyz.x * rwhite.x ) - labfunc( xyz.y * rwhite.y ) );
+        b = 200 * ( labfunc( xyz.y * rwhite.y ) - labfunc( xyz.z * rwhite.z ) );
+    }
+
+    Lab::Lab( const v3b& rgb )
+    {
+        new(this) Lab( XYZ( rgb ) );
+    }
+
+}
diff --git a/thirdparty/etcpak/ColorSpace.hpp b/thirdparty/etcpak/ColorSpace.hpp
new file mode 100644
index 0000000000..c9d0a9cf3f
--- /dev/null
+++ b/thirdparty/etcpak/ColorSpace.hpp
@@ -0,0 +1,36 @@
+#ifndef __DARKRL__COLORSPACE_HPP__
+#define __DARKRL__COLORSPACE_HPP__
+
+#include "Vector.hpp"
+
+namespace Color
+{
+
+    class Lab;
+
+    class XYZ
+    {
+    public:
+        XYZ( float x, float y, float z );
+        XYZ( const v3b& rgb );
+        XYZ( const Lab& lab );
+
+        v3i RGB() const;
+
+        float x, y, z;
+    };
+
+    class Lab
+    {
+    public:
+        Lab();
+        Lab( float L, float a, float b );
+        Lab( const XYZ& xyz );
+        Lab( const v3b& rgb );
+
+        float L, a, b;
+    };
+
+}
+
+#endif
diff --git a/thirdparty/etcpak/DataProvider.cpp b/thirdparty/etcpak/DataProvider.cpp
new file mode 100644
index 0000000000..6bd4b105ed
--- /dev/null
+++ b/thirdparty/etcpak/DataProvider.cpp
@@ -0,0 +1,77 @@
+#include <assert.h>
+#include <utility>
+
+#include "BitmapDownsampled.hpp"
+#include "DataProvider.hpp"
+#include "MipMap.hpp"
+
+DataProvider::DataProvider( const char* fn, bool mipmap, bool bgr )
+    : m_offset( 0 )
+    , m_mipmap( mipmap )
+    , m_done( false )
+    , m_lines( 32 )
+{
+    m_bmp.emplace_back( new Bitmap( fn, m_lines, bgr ) );
+    m_current = m_bmp[0].get();
+}
+
+DataProvider::~DataProvider()
+{
+}
+
+unsigned int DataProvider::NumberOfParts() const
+{
+    unsigned int parts = ( ( m_bmp[0]->Size().y / 4 ) + m_lines - 1 ) / m_lines;
+
+    if( m_mipmap )
+    {
+        v2i current = m_bmp[0]->Size();
+        int levels = NumberOfMipLevels( current );
+        unsigned int lines = m_lines;
+        for( int i=1; i<levels; i++ )
+        {
+            assert( current.x != 1 || current.y != 1 );
+            current.x = std::max( 1, current.x / 2 );
+            current.y = std::max( 1, current.y / 2 );
+            lines *= 2;
+            parts += ( ( std::max( 4, current.y ) / 4 ) + lines - 1 ) / lines;
+        }
+        assert( current.x == 1 && current.y == 1 );
+    }
+
+    return parts;
+}
+
+DataPart DataProvider::NextPart()
+{
+    assert( !m_done );
+
+    unsigned int lines = m_lines;
+    bool done;
+
+    const auto ptr = m_current->NextBlock( lines, done );
+    DataPart ret = {
+        ptr,
+        std::max<unsigned int>( 4, m_current->Size().x ),
+        lines,
+        m_offset
+    };
+
+    m_offset += m_current->Size().x / 4 * lines;
+
+    if( done )
+    {
+        if( m_mipmap && ( m_current->Size().x != 1 || m_current->Size().y != 1 ) )
+        {
+            m_lines *= 2;
+            m_bmp.emplace_back( new BitmapDownsampled( *m_current, m_lines ) );
+            m_current = m_bmp[m_bmp.size()-1].get();
+        }
+        else
+        {
+            m_done = true;
+        }
+    }
+
+    return ret;
+}
diff --git a/thirdparty/etcpak/DataProvider.hpp b/thirdparty/etcpak/DataProvider.hpp
new file mode 100644
index 0000000000..e773801ed6
--- /dev/null
+++ b/thirdparty/etcpak/DataProvider.hpp
@@ -0,0 +1,41 @@
+#ifndef __DATAPROVIDER_HPP__
+#define __DATAPROVIDER_HPP__
+
+#include <memory>
+#include <stdint.h>
+#include <vector>
+
+#include "Bitmap.hpp"
+
+struct DataPart
+{
+    const uint32_t* src;
+    unsigned int width;
+    unsigned int lines;
+    unsigned int offset;
+};
+
+class DataProvider
+{
+public:
+    DataProvider( const char* fn, bool mipmap, bool bgr );
+    ~DataProvider();
+
+    unsigned int NumberOfParts() const;
+
+    DataPart NextPart();
+
+    bool Alpha() const { return m_bmp[0]->Alpha(); }
+    const v2i& Size() const { return m_bmp[0]->Size(); }
+    const Bitmap& ImageData() const { return *m_bmp[0]; }
+
+private:
+    std::vector<std::unique_ptr<Bitmap>> m_bmp;
+    Bitmap* m_current;
+    unsigned int m_offset;
+    unsigned int m_lines;
+    bool m_mipmap;
+    bool m_done;
+};
+
+#endif
diff --git a/thirdparty/etcpak/Debug.cpp b/thirdparty/etcpak/Debug.cpp
new file mode 100644
index 0000000000..72dc4e0526
--- /dev/null
+++ b/thirdparty/etcpak/Debug.cpp
@@ -0,0 +1,31 @@
+#include <algorithm>
+#include <vector>
+#include "Debug.hpp"
+
+static std::vector<DebugLog::Callback*> s_callbacks;
+
+void DebugLog::Message( const char* msg )
+{
+    for( auto it = s_callbacks.begin(); it != s_callbacks.end(); ++it )
+    {
+        (*it)->OnDebugMessage( msg );
+    }
+}
+
+void DebugLog::AddCallback( Callback* c )
+{
+    const auto it = std::find( s_callbacks.begin(), s_callbacks.end(), c );
+    if( it == s_callbacks.end() )
+    {
+        s_callbacks.push_back( c );
+    }
+}
+
+void DebugLog::RemoveCallback( Callback* c )
+{
+    const auto it = std::find( s_callbacks.begin(), s_callbacks.end(), c );
+    if( it != s_callbacks.end() )
+    {
+        s_callbacks.erase( it );
+    }
+}
diff --git a/thirdparty/etcpak/Debug.hpp b/thirdparty/etcpak/Debug.hpp
new file mode 100644
index 0000000000..524eaa7baf
--- /dev/null
+++ b/thirdparty/etcpak/Debug.hpp
@@ -0,0 +1,27 @@
+#ifndef __DARKRL__DEBUG_HPP__
+#define __DARKRL__DEBUG_HPP__
+
+#ifdef DEBUG
+#  include <sstream>
+#  define DBGPRINT(msg) { std::stringstream __buf; __buf << msg; DebugLog::Message( __buf.str().c_str() ); }
+#else
+#  define DBGPRINT(msg) ((void)0)
+#endif
+
+class DebugLog
+{
+public:
+    struct Callback
+    {
+        virtual void OnDebugMessage( const char* msg ) = 0;
+    };
+
+    static void Message( const char* msg );
+    static void AddCallback( Callback* c );
+    static void RemoveCallback( Callback* c );
+
+private:
+    DebugLog() {}
+};
+
+#endif
diff --git a/thirdparty/etcpak/Dither.cpp b/thirdparty/etcpak/Dither.cpp
new file mode 100644
index 0000000000..355686f26b
--- /dev/null
+++ b/thirdparty/etcpak/Dither.cpp
@@ -0,0 +1,120 @@
+#include <algorithm>
+#include <string.h>
+
+#include "Dither.hpp"
+#include "Math.hpp"
+#ifdef __SSE4_1__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    include <Windows.h>
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+#ifdef __AVX2__
+void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
+    static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
+    static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
+    static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
+
+    const __m256i BayerAdd0 = _mm256_setr_epi8(
+        a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
+        a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
+    );
+    const __m256i BayerAdd1 = _mm256_setr_epi8(
+        a31[8],  a63[8],  a31[8],  0, a31[9],  a63[9],  a31[9],  0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
+        a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
+    );
+    const __m256i BayerSub0 = _mm256_setr_epi8(
+        s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
+        s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
+    );
+    const __m256i BayerSub1 = _mm256_setr_epi8(
+        s31[8],  s63[8],  s31[8],  0, s31[9],  s63[9],  s31[9],  0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
+        s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
+    );
+
+    __m256i l0 = _mm256_inserti128_si256( _mm256_castsi128_si256( px0 ), px1, 1 );
+    __m256i l1 = _mm256_inserti128_si256( _mm256_castsi128_si256( px2 ), px3, 1 );
+
+    __m256i a0 = _mm256_adds_epu8( l0, BayerAdd0 );
+    __m256i a1 = _mm256_adds_epu8( l1, BayerAdd1 );
+    __m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
+    __m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
+
+    _mm256_storeu_si256( (__m256i*)(data   ), s0 );
+    _mm256_storeu_si256( (__m256i*)(data+32), s1 );
+
+}
+#endif
+
+void Dither( uint8_t* data )
+{
+#ifdef __AVX2__
+    static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
+    static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
+    static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
+    static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
+
+    const __m256i BayerAdd0 = _mm256_setr_epi8(
+        a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
+        a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
+    );
+    const __m256i BayerAdd1 = _mm256_setr_epi8(
+        a31[8],  a63[8],  a31[8],  0, a31[9],  a63[9],  a31[9],  0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
+        a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
+    );
+    const __m256i BayerSub0 = _mm256_setr_epi8(
+        s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
+        s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
+    );
+    const __m256i BayerSub1 = _mm256_setr_epi8(
+        s31[8],  s63[8],  s31[8],  0, s31[9],  s63[9],  s31[9],  0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
+        s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
+    );
+
+    __m256i px0 = _mm256_loadu_si256( (__m256i*)(data   ) );
+    __m256i px1 = _mm256_loadu_si256( (__m256i*)(data+32) );
+
+    __m256i a0 = _mm256_adds_epu8( px0, BayerAdd0 );
+    __m256i a1 = _mm256_adds_epu8( px1, BayerAdd1 );
+    __m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
+    __m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
+
+    _mm256_storeu_si256( (__m256i*)(data   ), s0 );
+    _mm256_storeu_si256( (__m256i*)(data+32), s1 );
+#else
+    static constexpr int8_t Bayer31[16] = {
+        ( 0-8)*2/3, ( 8-8)*2/3, ( 2-8)*2/3, (10-8)*2/3,
+        (12-8)*2/3, ( 4-8)*2/3, (14-8)*2/3, ( 6-8)*2/3,
+        ( 3-8)*2/3, (11-8)*2/3, ( 1-8)*2/3, ( 9-8)*2/3,
+        (15-8)*2/3, ( 7-8)*2/3, (13-8)*2/3, ( 5-8)*2/3
+    };
+    static constexpr int8_t Bayer63[16] = {
+        ( 0-8)*2/6, ( 8-8)*2/6, ( 2-8)*2/6, (10-8)*2/6,
+        (12-8)*2/6, ( 4-8)*2/6, (14-8)*2/6, ( 6-8)*2/6,
+        ( 3-8)*2/6, (11-8)*2/6, ( 1-8)*2/6, ( 9-8)*2/6,
+        (15-8)*2/6, ( 7-8)*2/6, (13-8)*2/6, ( 5-8)*2/6
+    };
+
+    for( int i=0; i<16; i++ )
+    {
+        uint32_t col;
+        memcpy( &col, data, 4 );
+        uint8_t r = col & 0xFF;
+        uint8_t g = ( col >> 8 ) & 0xFF;
+        uint8_t b = ( col >> 16 ) & 0xFF;
+
+        r = clampu8( r + Bayer31[i] );
+        g = clampu8( g + Bayer63[i] );
+        b = clampu8( b + Bayer31[i] );
+
+        col = r | ( g << 8 ) | ( b << 16 );
+        memcpy( data, &col, 4 );
+        data += 4;
+    }
+#endif
+}
diff --git a/thirdparty/etcpak/Dither.hpp b/thirdparty/etcpak/Dither.hpp
new file mode 100644
index 0000000000..e43ce5676d
--- /dev/null
+++ b/thirdparty/etcpak/Dither.hpp
@@ -0,0 +1,21 @@
+#ifndef __DITHER_HPP__
+#define __DITHER_HPP__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __AVX2__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+void Dither( uint8_t* data );
+
+#ifdef __AVX2__
+void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 );
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/Error.cpp b/thirdparty/etcpak/Error.cpp
new file mode 100644
index 0000000000..014ecdab66
--- /dev/null
+++ b/thirdparty/etcpak/Error.cpp
@@ -0,0 +1,48 @@
+#include <stdint.h>
+
+#include "Error.hpp"
+#include "Math.hpp"
+
+float CalcMSE3( const Bitmap& bmp, const Bitmap& out )
+{
+    float err = 0;
+
+    const uint32_t* p1 = bmp.Data();
+    const uint32_t* p2 = out.Data();
+    size_t cnt = bmp.Size().x * bmp.Size().y;
+
+    for( size_t i=0; i<cnt; i++ )
+    {
+        uint32_t c1 = *p1++;
+        uint32_t c2 = *p2++;
+
+        err += sq( ( c1 & 0x000000FF ) - ( c2 & 0x000000FF ) );
+        err += sq( ( ( c1 & 0x0000FF00 ) >> 8 ) - ( ( c2 & 0x0000FF00 ) >> 8 ) );
+        err += sq( ( ( c1 & 0x00FF0000 ) >> 16 ) - ( ( c2 & 0x00FF0000 ) >> 16 ) );
+    }
+
+    err /= cnt * 3;
+
+    return err;
+}
+
+float CalcMSE1( const Bitmap& bmp, const Bitmap& out )
+{
+    float err = 0;
+
+    const uint32_t* p1 = bmp.Data();
+    const uint32_t* p2 = out.Data();
+    size_t cnt = bmp.Size().x * bmp.Size().y;
+
+    for( size_t i=0; i<cnt; i++ )
+    {
+        uint32_t c1 = *p1++;
+        uint32_t c2 = *p2++;
+
+        err += sq( ( c1 >> 24 ) - ( c2 & 0xFF ) );
+    }
+
+    err /= cnt;
+
+    return err;
+}
diff --git a/thirdparty/etcpak/Error.hpp b/thirdparty/etcpak/Error.hpp
new file mode 100644
index 0000000000..9817754b74
--- /dev/null
+++ b/thirdparty/etcpak/Error.hpp
@@ -0,0 +1,9 @@
+#ifndef __ERROR_HPP__
+#define __ERROR_HPP__
+
+#include "Bitmap.hpp"
+
+float CalcMSE3( const Bitmap& bmp, const Bitmap& out );
+float CalcMSE1( const Bitmap& bmp, const Bitmap& out );
+
+#endif
diff --git a/thirdparty/etcpak/ForceInline.hpp b/thirdparty/etcpak/ForceInline.hpp
new file mode 100644
index 0000000000..b6f012841b
--- /dev/null
+++ b/thirdparty/etcpak/ForceInline.hpp
@@ -0,0 +1,20 @@
+#ifndef __FORCEINLINE_HPP__
+#define __FORCEINLINE_HPP__
+
+#if defined(__GNUC__)
+#  define etcpak_force_inline __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#  define etcpak_force_inline __forceinline
+#else
+#  define etcpak_force_inline inline
+#endif
+
+#if defined(__GNUC__)
+#  define etcpak_no_inline __attribute__((noinline))
+#elif defined(_MSC_VER)
+#  define etcpak_no_inline __declspec(noinline)
+#else
+#  define etcpak_no_inline
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/LICENSE.txt b/thirdparty/etcpak/LICENSE.txt
new file mode 100644
index 0000000000..59e85d6ea5
--- /dev/null
+++ b/thirdparty/etcpak/LICENSE.txt
@@ -0,0 +1,26 @@
+etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
+
+Copyright (c) 2013-2021, Bartosz Taudul <wolf@nereid.pl>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/thirdparty/etcpak/Math.hpp b/thirdparty/etcpak/Math.hpp
new file mode 100644
index 0000000000..994e1ac4ea
--- /dev/null
+++ b/thirdparty/etcpak/Math.hpp
@@ -0,0 +1,92 @@
+#ifndef __DARKRL__MATH_HPP__
+#define __DARKRL__MATH_HPP__
+
+#include <algorithm>
+#include <cmath>
+#include <stdint.h>
+
+#include "ForceInline.hpp"
+
+template<typename T>
+static etcpak_force_inline T AlignPOT( T val )
+{
+    if( val == 0 ) return 1;
+    val--;
+    for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
+    {
+        val |= val >> i;
+    }
+    return val + 1;
+}
+
+static etcpak_force_inline int CountSetBits( uint32_t val )
+{
+    val -= ( val >> 1 ) & 0x55555555;
+    val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
+    val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
+    val += val >> 8;
+    val += val >> 16;
+    return val & 0x0000003f;
+}
+
+static etcpak_force_inline int CountLeadingZeros( uint32_t val )
+{
+    val |= val >> 1;
+    val |= val >> 2;
+    val |= val >> 4;
+    val |= val >> 8;
+    val |= val >> 16;
+    return 32 - CountSetBits( val );
+}
+
+static etcpak_force_inline float sRGB2linear( float v )
+{
+    const float a = 0.055f;
+    if( v <= 0.04045f )
+    {
+        return v / 12.92f;
+    }
+    else
+    {
+        return pow( ( v + a ) / ( 1 + a ), 2.4f );
+    }
+}
+
+static etcpak_force_inline float linear2sRGB( float v )
+{
+    const float a = 0.055f;
+    if( v <= 0.0031308f )
+    {
+        return 12.92f * v;
+    }
+    else
+    {
+        return ( 1 + a ) * pow( v, 1/2.4f ) - a;
+    }
+}
+
+template<class T>
+static etcpak_force_inline T SmoothStep( T x )
+{
+    return x*x*(3-2*x);
+}
+
+static etcpak_force_inline uint8_t clampu8( int32_t val )
+{
+    if( ( val & ~0xFF ) == 0 ) return val;
+    return ( ( ~val ) >> 31 ) & 0xFF;
+}
+
+template<class T>
+static etcpak_force_inline T sq( T val )
+{
+    return val * val;
+}
+
+static etcpak_force_inline int mul8bit( int a, int b )
+{
+    int t = a*b + 128;
+    return ( t + ( t >> 8 ) ) >> 8;
+}
+
+#endif
diff --git a/thirdparty/etcpak/MipMap.hpp b/thirdparty/etcpak/MipMap.hpp
new file mode 100644
index 0000000000..d3b4bc9e7c
--- /dev/null
+++ b/thirdparty/etcpak/MipMap.hpp
@@ -0,0 +1,11 @@
+#ifndef __MIPMAP_HPP__
+#define __MIPMAP_HPP__
+
+#include "Vector.hpp"
+
+inline int NumberOfMipLevels( const v2i& size )
+{
+    return (int)floor( log2( std::max( size.x, size.y ) ) ) + 1;
+}
+
+#endif
diff --git a/thirdparty/etcpak/ProcessCommon.hpp b/thirdparty/etcpak/ProcessCommon.hpp
new file mode 100644
index 0000000000..657d68888f
--- /dev/null
+++ b/thirdparty/etcpak/ProcessCommon.hpp
@@ -0,0 +1,50 @@
+#ifndef __PROCESSCOMMON_HPP__
+#define __PROCESSCOMMON_HPP__
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+template<class T>
+static size_t GetLeastError( const T* err, size_t num )
+{
+    size_t idx = 0;
+    for( size_t i=1; i<num; i++ )
+    {
+        if( err[i] < err[idx] )
+        {
+            idx = i;
+        }
+    }
+    return idx;
+}
+
+static uint64_t FixByteOrder( uint64_t d )
+{
+    return ( ( d & 0x00000000FFFFFFFF ) ) |
+           ( ( d & 0xFF00000000000000 ) >> 24 ) |
+           ( ( d & 0x000000FF00000000 ) << 24 ) |
+           ( ( d & 0x00FF000000000000 ) >> 8 ) |
+           ( ( d & 0x0000FF0000000000 ) << 8 );
+}
+
+template<class T, class S>
+static uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id )
+{
+    size_t tidx[2];
+    tidx[0] = GetLeastError( terr[0], 8 );
+    tidx[1] = GetLeastError( terr[1], 8 );
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+    for( int i=0; i<16; i++ )
+    {
+        uint64_t t = tsel[i][tidx[id[i]%2]];
+        d |= ( t & 0x1 ) << ( i + 32 );
+        d |= ( t & 0x2 ) << ( i + 47 );
+    }
+
+    return d;
+}
+
+#endif
diff --git a/thirdparty/etcpak/ProcessDxtc.cpp b/thirdparty/etcpak/ProcessDxtc.cpp
new file mode 100644
index 0000000000..508d55fd75
--- /dev/null
+++ b/thirdparty/etcpak/ProcessDxtc.cpp
@@ -0,0 +1,956 @@
+#include "Dither.hpp"
+#include "ForceInline.hpp"
+#include "ProcessDxtc.hpp"
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+#if defined __AVX__ && !defined __SSE4_1__
+#  define __SSE4_1__
+#endif
+
+#if defined __SSE4_1__ || defined __AVX2__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#  else
+#    include <x86intrin.h>
+#    ifndef _mm256_cvtsi256_si32
+#      define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
+#    endif
+#  endif
+#endif
+
+
+static etcpak_force_inline uint16_t to565( uint8_t r, uint8_t g, uint8_t b )
+{
+    return ( ( r & 0xF8 ) << 8 ) | ( ( g & 0xFC ) << 3 ) | ( b >> 3 );
+}
+
+static etcpak_force_inline uint16_t to565( uint32_t c )
+{
+    return
+        ( ( c & 0xF80000 ) >> 19 ) |
+        ( ( c & 0x00FC00 ) >> 5 ) |
+        ( ( c & 0x0000F8 ) << 8 );
+}
+
+static const uint8_t DxtcIndexTable[256] = {
+    85,     87,     86,     84,     93,     95,     94,     92,     89,     91,     90,     88,     81,     83,     82,     80,
+    117,    119,    118,    116,    125,    127,    126,    124,    121,    123,    122,    120,    113,    115,    114,    112,
+    101,    103,    102,    100,    109,    111,    110,    108,    105,    107,    106,    104,    97,     99,     98,     96,
+    69,     71,     70,     68,     77,     79,     78,     76,     73,     75,     74,     72,     65,     67,     66,     64,
+    213,    215,    214,    212,    221,    223,    222,    220,    217,    219,    218,    216,    209,    211,    210,    208,
+    245,    247,    246,    244,    253,    255,    254,    252,    249,    251,    250,    248,    241,    243,    242,    240,
+    229,    231,    230,    228,    237,    239,    238,    236,    233,    235,    234,    232,    225,    227,    226,    224,
+    197,    199,    198,    196,    205,    207,    206,    204,    201,    203,    202,    200,    193,    195,    194,    192,
+    149,    151,    150,    148,    157,    159,    158,    156,    153,    155,    154,    152,    145,    147,    146,    144,
+    181,    183,    182,    180,    189,    191,    190,    188,    185,    187,    186,    184,    177,    179,    178,    176,
+    165,    167,    166,    164,    173,    175,    174,    172,    169,    171,    170,    168,    161,    163,    162,    160,
+    133,    135,    134,    132,    141,    143,    142,    140,    137,    139,    138,    136,    129,    131,    130,    128,
+    21,     23,     22,     20,     29,     31,     30,     28,     25,     27,     26,     24,     17,     19,     18,     16,
+    53,     55,     54,     52,     61,     63,     62,     60,     57,     59,     58,     56,     49,     51,     50,     48,
+    37,     39,     38,     36,     45,     47,     46,     44,     41,     43,     42,     40,     33,     35,     34,     32,
+    5,      7,      6,      4,      13,     15,     14,     12,     9,      11,     10,     8,      1,      3,      2,      0
+};
+
+static const uint8_t AlphaIndexTable_SSE[64] = {
+    9,      15,     14,     13,     12,     11,     10,     8,      57,     63,     62,     61,     60,     59,     58,     56,
+    49,     55,     54,     53,     52,     51,     50,     48,     41,     47,     46,     45,     44,     43,     42,     40,
+    33,     39,     38,     37,     36,     35,     34,     32,     25,     31,     30,     29,     28,     27,     26,     24,
+    17,     23,     22,     21,     20,     19,     18,     16,     1,      7,      6,      5,      4,      3,      2,      0,
+};
+
+static const uint16_t DivTable[255*3+1] = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xcccc, 0xaaaa, 0x9249, 0x8000, 0x71c7, 0x6666, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444, 0x4000,
+    0x3c3c, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
+    0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
+    0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
+    0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
+    0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
+    0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
+    0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
+    0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
+    0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
+    0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
+    0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
+    0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
+    0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
+    0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
+    0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
+    0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
+    0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
+    0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
+    0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
+    0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
+    0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
+    0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
+    0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
+    0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
+    0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
+    0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
+    0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
+    0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
+    0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
+    0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
+    0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
+    0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
+    0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
+    0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
+    0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
+    0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
+    0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
+    0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
+    0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
+    0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
+    0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
+    0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
+    0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
+    0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
+    0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
+    0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
+    0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
+};
+static const uint16_t DivTableNEON[255*3+1] = {
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
+    0x0f83, 0x0f0f, 0x0ea0, 0x0e38, 0x0dd6, 0x0d79, 0x0d20, 0x0ccc, 0x0c7c, 0x0c30, 0x0be8, 0x0ba2, 0x0b60, 0x0b21, 0x0ae4, 0x0aaa,
+    0x0a72, 0x0a3d, 0x0a0a, 0x09d8, 0x09a9, 0x097b, 0x094f, 0x0924, 0x08fb, 0x08d3, 0x08ad, 0x0888, 0x0864, 0x0842, 0x0820, 0x0800,
+    0x07e0, 0x07c1, 0x07a4, 0x0787, 0x076b, 0x0750, 0x0736, 0x071c, 0x0703, 0x06eb, 0x06d3, 0x06bc, 0x06a6, 0x0690, 0x067b, 0x0666,
+    0x0652, 0x063e, 0x062b, 0x0618, 0x0606, 0x05f4, 0x05e2, 0x05d1, 0x05c0, 0x05b0, 0x05a0, 0x0590, 0x0581, 0x0572, 0x0563, 0x0555,
+    0x0547, 0x0539, 0x052b, 0x051e, 0x0511, 0x0505, 0x04f8, 0x04ec, 0x04e0, 0x04d4, 0x04c8, 0x04bd, 0x04b2, 0x04a7, 0x049c, 0x0492,
+    0x0487, 0x047d, 0x0473, 0x0469, 0x0460, 0x0456, 0x044d, 0x0444, 0x043b, 0x0432, 0x0429, 0x0421, 0x0418, 0x0410, 0x0408, 0x0400,
+    0x03f8, 0x03f0, 0x03e8, 0x03e0, 0x03d9, 0x03d2, 0x03ca, 0x03c3, 0x03bc, 0x03b5, 0x03ae, 0x03a8, 0x03a1, 0x039b, 0x0394, 0x038e,
+    0x0387, 0x0381, 0x037b, 0x0375, 0x036f, 0x0369, 0x0364, 0x035e, 0x0358, 0x0353, 0x034d, 0x0348, 0x0342, 0x033d, 0x0338, 0x0333,
+    0x032e, 0x0329, 0x0324, 0x031f, 0x031a, 0x0315, 0x0310, 0x030c, 0x0307, 0x0303, 0x02fe, 0x02fa, 0x02f5, 0x02f1, 0x02ec, 0x02e8,
+    0x02e4, 0x02e0, 0x02dc, 0x02d8, 0x02d4, 0x02d0, 0x02cc, 0x02c8, 0x02c4, 0x02c0, 0x02bc, 0x02b9, 0x02b5, 0x02b1, 0x02ae, 0x02aa,
+    0x02a7, 0x02a3, 0x02a0, 0x029c, 0x0299, 0x0295, 0x0292, 0x028f, 0x028c, 0x0288, 0x0285, 0x0282, 0x027f, 0x027c, 0x0279, 0x0276,
+    0x0273, 0x0270, 0x026d, 0x026a, 0x0267, 0x0264, 0x0261, 0x025e, 0x025c, 0x0259, 0x0256, 0x0253, 0x0251, 0x024e, 0x024b, 0x0249,
+    0x0246, 0x0243, 0x0241, 0x023e, 0x023c, 0x0239, 0x0237, 0x0234, 0x0232, 0x0230, 0x022d, 0x022b, 0x0229, 0x0226, 0x0224, 0x0222,
+    0x021f, 0x021d, 0x021b, 0x0219, 0x0216, 0x0214, 0x0212, 0x0210, 0x020e, 0x020c, 0x020a, 0x0208, 0x0206, 0x0204, 0x0202, 0x0200,
+    0x01fe, 0x01fc, 0x01fa, 0x01f8, 0x01f6, 0x01f4, 0x01f2, 0x01f0, 0x01ee, 0x01ec, 0x01ea, 0x01e9, 0x01e7, 0x01e5, 0x01e3, 0x01e1,
+    0x01e0, 0x01de, 0x01dc, 0x01da, 0x01d9, 0x01d7, 0x01d5, 0x01d4, 0x01d2, 0x01d0, 0x01cf, 0x01cd, 0x01cb, 0x01ca, 0x01c8, 0x01c7,
+    0x01c5, 0x01c3, 0x01c2, 0x01c0, 0x01bf, 0x01bd, 0x01bc, 0x01ba, 0x01b9, 0x01b7, 0x01b6, 0x01b4, 0x01b3, 0x01b2, 0x01b0, 0x01af,
+    0x01ad, 0x01ac, 0x01aa, 0x01a9, 0x01a8, 0x01a6, 0x01a5, 0x01a4, 0x01a2, 0x01a1, 0x01a0, 0x019e, 0x019d, 0x019c, 0x019a, 0x0199,
+    0x0198, 0x0197, 0x0195, 0x0194, 0x0193, 0x0192, 0x0190, 0x018f, 0x018e, 0x018d, 0x018b, 0x018a, 0x0189, 0x0188, 0x0187, 0x0186,
+    0x0184, 0x0183, 0x0182, 0x0181, 0x0180, 0x017f, 0x017e, 0x017d, 0x017b, 0x017a, 0x0179, 0x0178, 0x0177, 0x0176, 0x0175, 0x0174,
+    0x0173, 0x0172, 0x0171, 0x0170, 0x016f, 0x016e, 0x016d, 0x016c, 0x016b, 0x016a, 0x0169, 0x0168, 0x0167, 0x0166, 0x0165, 0x0164,
+    0x0163, 0x0162, 0x0161, 0x0160, 0x015f, 0x015e, 0x015d, 0x015c, 0x015b, 0x015a, 0x0159, 0x0158, 0x0158, 0x0157, 0x0156, 0x0155,
+    0x0154, 0x0153, 0x0152, 0x0151, 0x0150, 0x0150, 0x014f, 0x014e, 0x014d, 0x014c, 0x014b, 0x014a, 0x014a, 0x0149, 0x0148, 0x0147,
+    0x0146, 0x0146, 0x0145, 0x0144, 0x0143, 0x0142, 0x0142, 0x0141, 0x0140, 0x013f, 0x013e, 0x013e, 0x013d, 0x013c, 0x013b, 0x013b,
+    0x013a, 0x0139, 0x0138, 0x0138, 0x0137, 0x0136, 0x0135, 0x0135, 0x0134, 0x0133, 0x0132, 0x0132, 0x0131, 0x0130, 0x0130, 0x012f,
+    0x012e, 0x012e, 0x012d, 0x012c, 0x012b, 0x012b, 0x012a, 0x0129, 0x0129, 0x0128, 0x0127, 0x0127, 0x0126, 0x0125, 0x0125, 0x0124,
+    0x0123, 0x0123, 0x0122, 0x0121, 0x0121, 0x0120, 0x0120, 0x011f, 0x011e, 0x011e, 0x011d, 0x011c, 0x011c, 0x011b, 0x011b, 0x011a,
+    0x0119, 0x0119, 0x0118, 0x0118, 0x0117, 0x0116, 0x0116, 0x0115, 0x0115, 0x0114, 0x0113, 0x0113, 0x0112, 0x0112, 0x0111, 0x0111,
+    0x0110, 0x010f, 0x010f, 0x010e, 0x010e, 0x010d, 0x010d, 0x010c, 0x010c, 0x010b, 0x010a, 0x010a, 0x0109, 0x0109, 0x0108, 0x0108,
+    0x0107, 0x0107, 0x0106, 0x0106, 0x0105, 0x0105, 0x0104, 0x0104, 0x0103, 0x0103, 0x0102, 0x0102, 0x0101, 0x0101, 0x0100, 0x0100,
+    0x00ff, 0x00ff, 0x00fe, 0x00fe, 0x00fd, 0x00fd, 0x00fc, 0x00fc, 0x00fb, 0x00fb, 0x00fa, 0x00fa, 0x00f9, 0x00f9, 0x00f8, 0x00f8,
+    0x00f7, 0x00f7, 0x00f6, 0x00f6, 0x00f5, 0x00f5, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f3, 0x00f2, 0x00f2, 0x00f1, 0x00f1, 0x00f0,
+    0x00f0, 0x00f0, 0x00ef, 0x00ef, 0x00ee, 0x00ee, 0x00ed, 0x00ed, 0x00ed, 0x00ec, 0x00ec, 0x00eb, 0x00eb, 0x00ea, 0x00ea, 0x00ea,
+    0x00e9, 0x00e9, 0x00e8, 0x00e8, 0x00e7, 0x00e7, 0x00e7, 0x00e6, 0x00e6, 0x00e5, 0x00e5, 0x00e5, 0x00e4, 0x00e4, 0x00e3, 0x00e3,
+    0x00e3, 0x00e2, 0x00e2, 0x00e1, 0x00e1, 0x00e1, 0x00e0, 0x00e0, 0x00e0, 0x00df, 0x00df, 0x00de, 0x00de, 0x00de, 0x00dd, 0x00dd,
+    0x00dd, 0x00dc, 0x00dc, 0x00db, 0x00db, 0x00db, 0x00da, 0x00da, 0x00da, 0x00d9, 0x00d9, 0x00d9, 0x00d8, 0x00d8, 0x00d7, 0x00d7,
+    0x00d7, 0x00d6, 0x00d6, 0x00d6, 0x00d5, 0x00d5, 0x00d5, 0x00d4, 0x00d4, 0x00d4, 0x00d3, 0x00d3, 0x00d3, 0x00d2, 0x00d2, 0x00d2,
+    0x00d1, 0x00d1, 0x00d1, 0x00d0, 0x00d0, 0x00d0, 0x00cf, 0x00cf, 0x00cf, 0x00ce, 0x00ce, 0x00ce, 0x00cd, 0x00cd, 0x00cd, 0x00cc,
+    0x00cc, 0x00cc, 0x00cb, 0x00cb, 0x00cb, 0x00ca, 0x00ca, 0x00ca, 0x00c9, 0x00c9, 0x00c9, 0x00c9, 0x00c8, 0x00c8, 0x00c8, 0x00c7,
+    0x00c7, 0x00c7, 0x00c6, 0x00c6, 0x00c6, 0x00c5, 0x00c5, 0x00c5, 0x00c5, 0x00c4, 0x00c4, 0x00c4, 0x00c3, 0x00c3, 0x00c3, 0x00c3,
+    0x00c2, 0x00c2, 0x00c2, 0x00c1, 0x00c1, 0x00c1, 0x00c1, 0x00c0, 0x00c0, 0x00c0, 0x00bf, 0x00bf, 0x00bf, 0x00bf, 0x00be, 0x00be,
+    0x00be, 0x00bd, 0x00bd, 0x00bd, 0x00bd, 0x00bc, 0x00bc, 0x00bc, 0x00bc, 0x00bb, 0x00bb, 0x00bb, 0x00ba, 0x00ba, 0x00ba, 0x00ba,
+    0x00b9, 0x00b9, 0x00b9, 0x00b9, 0x00b8, 0x00b8, 0x00b8, 0x00b8, 0x00b7, 0x00b7, 0x00b7, 0x00b7, 0x00b6, 0x00b6, 0x00b6, 0x00b6,
+    0x00b5, 0x00b5, 0x00b5, 0x00b5, 0x00b4, 0x00b4, 0x00b4, 0x00b4, 0x00b3, 0x00b3, 0x00b3, 0x00b3, 0x00b2, 0x00b2, 0x00b2, 0x00b2,
+    0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae,
+    0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab,
+};
+
+static const uint16_t DivTableAlpha[256] = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xe38e, 0xcccc, 0xba2e, 0xaaaa, 0x9d89, 0x9249, 0x8888, 0x8000,
+    0x7878, 0x71c7, 0x6bca, 0x6666, 0x6186, 0x5d17, 0x590b, 0x5555, 0x51eb, 0x4ec4, 0x4bda, 0x4924, 0x469e, 0x4444, 0x4210, 0x4000,
+    0x3e0f, 0x3c3c, 0x3a83, 0x38e3, 0x3759, 0x35e5, 0x3483, 0x3333, 0x31f3, 0x30c3, 0x2fa0, 0x2e8b, 0x2d82, 0x2c85, 0x2b93, 0x2aaa,
+    0x29cb, 0x28f5, 0x2828, 0x2762, 0x26a4, 0x25ed, 0x253c, 0x2492, 0x23ee, 0x234f, 0x22b6, 0x2222, 0x2192, 0x2108, 0x2082, 0x2000,
+    0x1f81, 0x1f07, 0x1e91, 0x1e1e, 0x1dae, 0x1d41, 0x1cd8, 0x1c71, 0x1c0e, 0x1bac, 0x1b4e, 0x1af2, 0x1a98, 0x1a41, 0x19ec, 0x1999,
+    0x1948, 0x18f9, 0x18ac, 0x1861, 0x1818, 0x17d0, 0x178a, 0x1745, 0x1702, 0x16c1, 0x1681, 0x1642, 0x1605, 0x15c9, 0x158e, 0x1555,
+    0x151d, 0x14e5, 0x14af, 0x147a, 0x1446, 0x1414, 0x13e2, 0x13b1, 0x1381, 0x1352, 0x1323, 0x12f6, 0x12c9, 0x129e, 0x1273, 0x1249,
+    0x121f, 0x11f7, 0x11cf, 0x11a7, 0x1181, 0x115b, 0x1135, 0x1111, 0x10ec, 0x10c9, 0x10a6, 0x1084, 0x1062, 0x1041, 0x1020, 0x1000,
+    0x0fe0, 0x0fc0, 0x0fa2, 0x0f83, 0x0f66, 0x0f48, 0x0f2b, 0x0f0f, 0x0ef2, 0x0ed7, 0x0ebb, 0x0ea0, 0x0e86, 0x0e6c, 0x0e52, 0x0e38,
+    0x0e1f, 0x0e07, 0x0dee, 0x0dd6, 0x0dbe, 0x0da7, 0x0d90, 0x0d79, 0x0d62, 0x0d4c, 0x0d36, 0x0d20, 0x0d0b, 0x0cf6, 0x0ce1, 0x0ccc,
+    0x0cb8, 0x0ca4, 0x0c90, 0x0c7c, 0x0c69, 0x0c56, 0x0c43, 0x0c30, 0x0c1e, 0x0c0c, 0x0bfa, 0x0be8, 0x0bd6, 0x0bc5, 0x0bb3, 0x0ba2,
+    0x0b92, 0x0b81, 0x0b70, 0x0b60, 0x0b50, 0x0b40, 0x0b30, 0x0b21, 0x0b11, 0x0b02, 0x0af3, 0x0ae4, 0x0ad6, 0x0ac7, 0x0ab8, 0x0aaa,
+    0x0a9c, 0x0a8e, 0x0a80, 0x0a72, 0x0a65, 0x0a57, 0x0a4a, 0x0a3d, 0x0a30, 0x0a23, 0x0a16, 0x0a0a, 0x09fd, 0x09f1, 0x09e4, 0x09d8,
+    0x09cc, 0x09c0, 0x09b4, 0x09a9, 0x099d, 0x0991, 0x0986, 0x097b, 0x0970, 0x0964, 0x095a, 0x094f, 0x0944, 0x0939, 0x092f, 0x0924,
+    0x091a, 0x090f, 0x0905, 0x08fb, 0x08f1, 0x08e7, 0x08dd, 0x08d3, 0x08ca, 0x08c0, 0x08b7, 0x08ad, 0x08a4, 0x089a, 0x0891, 0x0888,
+    0x087f, 0x0876, 0x086d, 0x0864, 0x085b, 0x0853, 0x084a, 0x0842, 0x0839, 0x0831, 0x0828, 0x0820, 0x0818, 0x0810, 0x0808, 0x0800,
+};
+
+static etcpak_force_inline uint64_t ProcessRGB( const uint8_t* src )
+{
+#ifdef __SSE4_1__
+    __m128i px0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i px1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i px2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i px3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i smask = _mm_set1_epi32( 0xF8FCF8 );
+    __m128i sd0 = _mm_and_si128( px0, smask );
+    __m128i sd1 = _mm_and_si128( px1, smask );
+    __m128i sd2 = _mm_and_si128( px2, smask );
+    __m128i sd3 = _mm_and_si128( px3, smask );
+
+    __m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i sc0 = _mm_cmpeq_epi8(sd0, sc);
+    __m128i sc1 = _mm_cmpeq_epi8(sd1, sc);
+    __m128i sc2 = _mm_cmpeq_epi8(sd2, sc);
+    __m128i sc3 = _mm_cmpeq_epi8(sd3, sc);
+
+    __m128i sm0 = _mm_and_si128(sc0, sc1);
+    __m128i sm1 = _mm_and_si128(sc2, sc3);
+    __m128i sm = _mm_and_si128(sm0, sm1);
+
+    if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) )
+    {
+        uint32_t c;
+        memcpy( &c, src, 4 );
+        return uint64_t( to565( c ) ) << 16;
+    }
+
+    __m128i min0 = _mm_min_epu8( px0, px1 );
+    __m128i min1 = _mm_min_epu8( px2, px3 );
+    __m128i min2 = _mm_min_epu8( min0, min1 );
+
+    __m128i max0 = _mm_max_epu8( px0, px1 );
+    __m128i max1 = _mm_max_epu8( px2, px3 );
+    __m128i max2 = _mm_max_epu8( max0, max1 );
+
+    __m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i min4 = _mm_min_epu8( min2, min3 );
+    __m128i max4 = _mm_max_epu8( max2, max3 );
+
+    __m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i rmin = _mm_min_epu8( min4, min5 );
+    __m128i rmax = _mm_max_epu8( max4, max5 );
+
+    __m128i range1 = _mm_subs_epu8( rmax, rmin );
+    __m128i range2 = _mm_sad_epu8( rmax, rmin );
+
+    uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1;
+    __m128i range = _mm_set1_epi16( DivTable[vrange] );
+
+    __m128i inset1 = _mm_srli_epi16( range1, 4 );
+    __m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) );
+    __m128i min = _mm_adds_epu8( rmin, inset );
+    __m128i max = _mm_subs_epu8( rmax, inset );
+
+    __m128i c0 = _mm_subs_epu8( px0, rmin );
+    __m128i c1 = _mm_subs_epu8( px1, rmin );
+    __m128i c2 = _mm_subs_epu8( px2, rmin );
+    __m128i c3 = _mm_subs_epu8( px3, rmin );
+
+    __m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) );
+    __m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) );
+    __m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) );
+    __m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) );
+
+    __m128i s0 = _mm_hadd_epi16( is0, is1 );
+    __m128i s1 = _mm_hadd_epi16( is2, is3 );
+
+    __m128i m0 = _mm_mulhi_epu16( s0, range );
+    __m128i m1 = _mm_mulhi_epu16( s1, range );
+
+    __m128i p0 = _mm_packus_epi16( m0, m1 );
+
+    __m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) );
+    __m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 );
+    __m128i p3 = _mm_or_si128( p1, p2 );
+    __m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) );
+
+    uint32_t vmin = _mm_cvtsi128_si32( min );
+    uint32_t vmax = _mm_cvtsi128_si32( max );
+    uint32_t vp = _mm_cvtsi128_si32( p );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+#elif defined __ARM_NEON
+#  ifdef __aarch64__
+    uint8x16x4_t px = vld4q_u8( src );
+
+    uint8x16_t lr = px.val[0];
+    uint8x16_t lg = px.val[1];
+    uint8x16_t lb = px.val[2];
+
+    uint8_t rmaxr = vmaxvq_u8( lr );
+    uint8_t rmaxg = vmaxvq_u8( lg );
+    uint8_t rmaxb = vmaxvq_u8( lb );
+
+    uint8_t rminr = vminvq_u8( lr );
+    uint8_t rming = vminvq_u8( lg );
+    uint8_t rminb = vminvq_u8( lb );
+
+    int rr = rmaxr - rminr;
+    int rg = rmaxg - rming;
+    int rb = rmaxb - rminb;
+
+    int vrange1 = rr + rg + rb;
+    uint16_t vrange2 = DivTableNEON[vrange1];
+
+    uint8_t insetr = rr >> 4;
+    uint8_t insetg = rg >> 4;
+    uint8_t insetb = rb >> 4;
+
+    uint8_t minr = rminr + insetr;
+    uint8_t ming = rming + insetg;
+    uint8_t minb = rminb + insetb;
+
+    uint8_t maxr = rmaxr - insetr;
+    uint8_t maxg = rmaxg - insetg;
+    uint8_t maxb = rmaxb - insetb;
+
+    uint8x16_t cr = vsubq_u8( lr, vdupq_n_u8( rminr ) );
+    uint8x16_t cg = vsubq_u8( lg, vdupq_n_u8( rming ) );
+    uint8x16_t cb = vsubq_u8( lb, vdupq_n_u8( rminb ) );
+
+    uint16x8_t is0l = vaddl_u8( vget_low_u8( cr ), vget_low_u8( cg ) );
+    uint16x8_t is0h = vaddl_u8( vget_high_u8( cr ), vget_high_u8( cg ) );
+    uint16x8_t is1l = vaddw_u8( is0l, vget_low_u8( cb ) );
+    uint16x8_t is1h = vaddw_u8( is0h, vget_high_u8( cb ) );
+
+    int16x8_t range = vdupq_n_s16( vrange2 );
+    uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1l ), range ) );
+    uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1h ), range ) );
+
+    uint8x8_t p00 = vmovn_u16( m0 );
+    uint8x8_t p01 = vmovn_u16( m1 );
+    uint8x16_t p0 = vcombine_u8( p00, p01 );
+
+    uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
+    uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
+    uint32x4_t p3 = vaddq_u32( p1, p2 );
+
+    uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
+    uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
+
+    uint32_t vp;
+    vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
+
+    return uint64_t( ( uint64_t( to565( minr, ming, minb ) ) << 16 ) | to565( maxr, maxg, maxb ) | ( uint64_t( vp ) << 32 ) );
+#  else
+    uint32x4_t px0 = vld1q_u32( (uint32_t*)src );
+    uint32x4_t px1 = vld1q_u32( (uint32_t*)src + 4 );
+    uint32x4_t px2 = vld1q_u32( (uint32_t*)src + 8 );
+    uint32x4_t px3 = vld1q_u32( (uint32_t*)src + 12 );
+
+    uint32x4_t smask = vdupq_n_u32( 0xF8FCF8 );
+    uint32x4_t sd0 = vandq_u32( smask, px0 );
+    uint32x4_t sd1 = vandq_u32( smask, px1 );
+    uint32x4_t sd2 = vandq_u32( smask, px2 );
+    uint32x4_t sd3 = vandq_u32( smask, px3 );
+
+    uint32x4_t sc = vdupq_n_u32( sd0[0] );
+
+    uint32x4_t sc0 = vceqq_u32( sd0, sc );
+    uint32x4_t sc1 = vceqq_u32( sd1, sc );
+    uint32x4_t sc2 = vceqq_u32( sd2, sc );
+    uint32x4_t sc3 = vceqq_u32( sd3, sc );
+
+    uint32x4_t sm0 = vandq_u32( sc0, sc1 );
+    uint32x4_t sm1 = vandq_u32( sc2, sc3 );
+    int64x2_t sm = vreinterpretq_s64_u32( vandq_u32( sm0, sm1 ) );
+
+    if( sm[0] == -1 && sm[1] == -1 )
+    {
+        return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
+    }
+
+    uint32x4_t mask = vdupq_n_u32( 0xFFFFFF );
+    uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) );
+    uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) );
+    uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) );
+    uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) );
+
+    uint8x16_t min0 = vminq_u8( l0, l1 );
+    uint8x16_t min1 = vminq_u8( l2, l3 );
+    uint8x16_t min2 = vminq_u8( min0, min1 );
+
+    uint8x16_t max0 = vmaxq_u8( l0, l1 );
+    uint8x16_t max1 = vmaxq_u8( l2, l3 );
+    uint8x16_t max2 = vmaxq_u8( max0, max1 );
+
+    uint8x16_t min3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( min2 ) ) );
+    uint8x16_t max3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( max2 ) ) );
+
+    uint8x16_t min4 = vminq_u8( min2, min3 );
+    uint8x16_t max4 = vmaxq_u8( max2, max3 );
+
+    uint8x16_t min5 = vcombine_u8( vget_high_u8( min4 ), vget_low_u8( min4 ) );
+    uint8x16_t max5 = vcombine_u8( vget_high_u8( max4 ), vget_low_u8( max4 ) );
+
+    uint8x16_t rmin = vminq_u8( min4, min5 );
+    uint8x16_t rmax = vmaxq_u8( max4, max5 );
+
+    uint8x16_t range1 = vsubq_u8( rmax, rmin );
+    uint8x8_t range2 = vget_low_u8( range1 );
+    uint8x8x2_t range3 = vzip_u8( range2, vdup_n_u8( 0 ) );
+    uint16x4_t range4 = vreinterpret_u16_u8( range3.val[0] );
+
+    uint16_t vrange1;
+    uint16x4_t range5 = vpadd_u16( range4, range4 );
+    uint16x4_t range6 = vpadd_u16( range5, range5 );
+    vst1_lane_u16( &vrange1, range6, 0 );
+
+    uint32_t vrange2 = ( 2 << 16 ) / uint32_t( vrange1 + 1 );
+    uint16x8_t range = vdupq_n_u16( vrange2 );
+
+    uint8x16_t inset = vshrq_n_u8( range1, 4 );
+    uint8x16_t min = vaddq_u8( rmin, inset );
+    uint8x16_t max = vsubq_u8( rmax, inset );
+
+    uint8x16_t c0 = vsubq_u8( l0, rmin );
+    uint8x16_t c1 = vsubq_u8( l1, rmin );
+    uint8x16_t c2 = vsubq_u8( l2, rmin );
+    uint8x16_t c3 = vsubq_u8( l3, rmin );
+
+    uint16x8_t is0 = vpaddlq_u8( c0 );
+    uint16x8_t is1 = vpaddlq_u8( c1 );
+    uint16x8_t is2 = vpaddlq_u8( c2 );
+    uint16x8_t is3 = vpaddlq_u8( c3 );
+
+    uint16x4_t is4 = vpadd_u16( vget_low_u16( is0 ), vget_high_u16( is0 ) );
+    uint16x4_t is5 = vpadd_u16( vget_low_u16( is1 ), vget_high_u16( is1 ) );
+    uint16x4_t is6 = vpadd_u16( vget_low_u16( is2 ), vget_high_u16( is2 ) );
+    uint16x4_t is7 = vpadd_u16( vget_low_u16( is3 ), vget_high_u16( is3 ) );
+
+    uint16x8_t s0 = vcombine_u16( is4, is5 );
+    uint16x8_t s1 = vcombine_u16( is6, is7 );
+
+    uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s0 ), vreinterpretq_s16_u16( range ) ) );
+    uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s1 ), vreinterpretq_s16_u16( range ) ) );
+
+    uint8x8_t p00 = vmovn_u16( m0 );
+    uint8x8_t p01 = vmovn_u16( m1 );
+    uint8x16_t p0 = vcombine_u8( p00, p01 );
+
+    uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
+    uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
+    uint32x4_t p3 = vaddq_u32( p1, p2 );
+
+    uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
+    uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
+
+    uint32_t vmin, vmax, vp;
+    vst1q_lane_u32( &vmin, vreinterpretq_u32_u8( min ), 0 );
+    vst1q_lane_u32( &vmax, vreinterpretq_u32_u8( max ), 0 );
+    vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+#  endif
+#else
+    uint32_t ref;
+    memcpy( &ref, src, 4 );
+    uint32_t refMask = ref & 0xF8FCF8;
+    auto stmp = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        uint32_t px;
+        memcpy( &px, stmp, 4 );
+        if( ( px & 0xF8FCF8 ) != refMask ) break;
+        stmp += 4;
+    }
+    if( stmp == src + 64 )
+    {
+        return uint64_t( to565( ref ) ) << 16;
+    }
+
+    uint8_t min[3] = { src[0], src[1], src[2] };
+    uint8_t max[3] = { src[0], src[1], src[2] };
+    auto tmp = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        for( int j=0; j<3; j++ )
+        {
+            if( tmp[j] < min[j] ) min[j] = tmp[j];
+            else if( tmp[j] > max[j] ) max[j] = tmp[j];
+        }
+        tmp += 4;
+    }
+
+    const uint32_t range = DivTable[max[0] - min[0] + max[1] - min[1] + max[2] - min[2]];
+    const uint32_t rmin = min[0] + min[1] + min[2];
+    for( int i=0; i<3; i++ )
+    {
+        const uint8_t inset = ( max[i] - min[i] ) >> 4;
+        min[i] += inset;
+        max[i] -= inset;
+    }
+
+    uint32_t data = 0;
+    for( int i=0; i<16; i++ )
+    {
+        const uint32_t c = src[0] + src[1] + src[2] - rmin;
+        const uint8_t idx = ( c * range ) >> 16;
+        data |= idx << (i*2);
+        src += 4;
+    }
+
+    return uint64_t( ( uint64_t( to565( min[0], min[1], min[2] ) ) << 16 ) | to565( max[0], max[1], max[2] ) | ( uint64_t( data ) << 32 ) );
+#endif
+}
+
+#ifdef __AVX2__
+static etcpak_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
+{
+    __m256i px0 = _mm256_loadu_si256(((__m256i*)src) + 0);
+    __m256i px1 = _mm256_loadu_si256(((__m256i*)src) + 1);
+    __m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
+    __m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
+
+    __m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
+    __m256i sd0 = _mm256_and_si256( px0, smask );
+    __m256i sd1 = _mm256_and_si256( px1, smask );
+    __m256i sd2 = _mm256_and_si256( px2, smask );
+    __m256i sd3 = _mm256_and_si256( px3, smask );
+
+    __m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m256i sc0 = _mm256_cmpeq_epi8(sd0, sc);
+    __m256i sc1 = _mm256_cmpeq_epi8(sd1, sc);
+    __m256i sc2 = _mm256_cmpeq_epi8(sd2, sc);
+    __m256i sc3 = _mm256_cmpeq_epi8(sd3, sc);
+
+    __m256i sm0 = _mm256_and_si256(sc0, sc1);
+    __m256i sm1 = _mm256_and_si256(sc2, sc3);
+    __m256i sm = _mm256_and_si256(sm0, sm1);
+
+    const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
+    const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
+
+    if( solid0 + solid1 == 0 )
+    {
+        const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) );
+        const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) );
+        memcpy( dst, &c0, 8 );
+        memcpy( dst+8, &c1, 8 );
+        dst += 16;
+        return;
+    }
+
+    __m256i min0 = _mm256_min_epu8( px0, px1 );
+    __m256i min1 = _mm256_min_epu8( px2, px3 );
+    __m256i min2 = _mm256_min_epu8( min0, min1 );
+
+    __m256i max0 = _mm256_max_epu8( px0, px1 );
+    __m256i max1 = _mm256_max_epu8( px2, px3 );
+    __m256i max2 = _mm256_max_epu8( max0, max1 );
+
+    __m256i min3 = _mm256_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m256i max3 = _mm256_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m256i min4 = _mm256_min_epu8( min2, min3 );
+    __m256i max4 = _mm256_max_epu8( max2, max3 );
+
+    __m256i min5 = _mm256_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m256i max5 = _mm256_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m256i rmin = _mm256_min_epu8( min4, min5 );
+    __m256i rmax = _mm256_max_epu8( max4, max5 );
+
+    __m256i range1 = _mm256_subs_epu8( rmax, rmin );
+    __m256i range2 = _mm256_sad_epu8( rmax, rmin );
+
+    uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
+    uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
+    __m256i range00 = _mm256_set1_epi16( vrange0 );
+    __m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
+
+    __m256i inset1 = _mm256_srli_epi16( range1, 4 );
+    __m256i inset = _mm256_and_si256( inset1, _mm256_set1_epi8( 0xF ) );
+    __m256i min = _mm256_adds_epu8( rmin, inset );
+    __m256i max = _mm256_subs_epu8( rmax, inset );
+
+    __m256i c0 = _mm256_subs_epu8( px0, rmin );
+    __m256i c1 = _mm256_subs_epu8( px1, rmin );
+    __m256i c2 = _mm256_subs_epu8( px2, rmin );
+    __m256i c3 = _mm256_subs_epu8( px3, rmin );
+
+    __m256i is0 = _mm256_maddubs_epi16( c0, _mm256_set1_epi8( 1 ) );
+    __m256i is1 = _mm256_maddubs_epi16( c1, _mm256_set1_epi8( 1 ) );
+    __m256i is2 = _mm256_maddubs_epi16( c2, _mm256_set1_epi8( 1 ) );
+    __m256i is3 = _mm256_maddubs_epi16( c3, _mm256_set1_epi8( 1 ) );
+
+    __m256i s0 = _mm256_hadd_epi16( is0, is1 );
+    __m256i s1 = _mm256_hadd_epi16( is2, is3 );
+
+    __m256i m0 = _mm256_mulhi_epu16( s0, range );
+    __m256i m1 = _mm256_mulhi_epu16( s1, range );
+
+    __m256i p0 = _mm256_packus_epi16( m0, m1 );
+
+    __m256i p1 = _mm256_or_si256( _mm256_srai_epi32( p0, 6 ), _mm256_srai_epi32( p0, 12 ) );
+    __m256i p2 = _mm256_or_si256( _mm256_srai_epi32( p0, 18 ), p0 );
+    __m256i p3 = _mm256_or_si256( p1, p2 );
+    __m256i p =_mm256_shuffle_epi8( p3, _mm256_set1_epi32( 0x0C080400 ) );
+
+    __m256i mm0 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), min );
+    __m256i mm1 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), max );
+    __m256i mm2 = _mm256_unpacklo_epi64( mm1, mm0 );
+    __m256i mmr = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 11 ), 11 );
+    __m256i mmg = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 26 ), 5 );
+    __m256i mmb = _mm256_srli_epi64( _mm256_slli_epi64( mm2, 16 ), 59 );
+    __m256i mm3 = _mm256_or_si256( mmr, mmg );
+    __m256i mm4 = _mm256_or_si256( mm3, mmb );
+    __m256i mm5 = _mm256_shuffle_epi8( mm4, _mm256_set1_epi32( 0x09080100 ) );
+
+    __m256i d0 = _mm256_unpacklo_epi32( mm5, p );
+    __m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
+    __m128i d2 = _mm256_castsi256_si128( d1 );
+
+    __m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
+    __m128i d3 = _mm_and_si128( d2, mask );
+    _mm_storeu_si128( (__m128i*)dst, d3 );
+
+    for( int j=4; j<8; j++ ) dst[j] = (char)DxtcIndexTable[(uint8_t)dst[j]];
+    for( int j=12; j<16; j++ ) dst[j] = (char)DxtcIndexTable[(uint8_t)dst[j]];
+
+    dst += 16;
+}
+#endif
+
+static const uint8_t AlphaIndexTable[8] = { 1, 7, 6, 5, 4, 3, 2, 0 };
+
+static etcpak_force_inline uint64_t ProcessAlpha( const uint8_t* src )
+{
+    uint8_t solid8 = *src;
+    uint16_t solid16 = uint16_t( solid8 ) | ( uint16_t( solid8 ) << 8 );
+    uint32_t solid32 = uint32_t( solid16 ) | ( uint32_t( solid16 ) << 16 );
+    uint64_t solid64 = uint64_t( solid32 ) | ( uint64_t( solid32 ) << 32 );
+    if( memcmp( src, &solid64, 8 ) == 0 && memcmp( src+8, &solid64, 8 ) == 0 )
+    {
+        return solid8;
+    }
+
+    uint8_t min = src[0];
+    uint8_t max = min;
+    for( int i=1; i<16; i++ )
+    {
+        const auto v = src[i];
+        if( v > max ) max = v;
+        else if( v < min ) min = v;
+    }
+
+    uint32_t range = ( 8 << 13 ) / ( 1 + max - min );
+    uint64_t data = 0;
+    for( int i=0; i<16; i++ )
+    {
+        uint8_t a = src[i] - min;
+        uint64_t idx = AlphaIndexTable[( a * range ) >> 13];
+        data |= idx << (i*3);
+    }
+
+    return max | ( min << 8 ) | ( data << 16 );
+}
+
+#ifdef __SSE4_1__
+static etcpak_force_inline uint64_t ProcessRGB_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    __m128i smask = _mm_set1_epi32( 0xF8FCF8 );
+    __m128i sd0 = _mm_and_si128( px0, smask );
+    __m128i sd1 = _mm_and_si128( px1, smask );
+    __m128i sd2 = _mm_and_si128( px2, smask );
+    __m128i sd3 = _mm_and_si128( px3, smask );
+
+    __m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i sc0 = _mm_cmpeq_epi8(sd0, sc);
+    __m128i sc1 = _mm_cmpeq_epi8(sd1, sc);
+    __m128i sc2 = _mm_cmpeq_epi8(sd2, sc);
+    __m128i sc3 = _mm_cmpeq_epi8(sd3, sc);
+
+    __m128i sm0 = _mm_and_si128(sc0, sc1);
+    __m128i sm1 = _mm_and_si128(sc2, sc3);
+    __m128i sm = _mm_and_si128(sm0, sm1);
+
+    if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) )
+    {
+        return uint64_t( to565( _mm_cvtsi128_si32( px0 ) ) ) << 16;
+    }
+
+    px0 = _mm_and_si128( px0, _mm_set1_epi32( 0xFFFFFF ) );
+    px1 = _mm_and_si128( px1, _mm_set1_epi32( 0xFFFFFF ) );
+    px2 = _mm_and_si128( px2, _mm_set1_epi32( 0xFFFFFF ) );
+    px3 = _mm_and_si128( px3, _mm_set1_epi32( 0xFFFFFF ) );
+
+    __m128i min0 = _mm_min_epu8( px0, px1 );
+    __m128i min1 = _mm_min_epu8( px2, px3 );
+    __m128i min2 = _mm_min_epu8( min0, min1 );
+
+    __m128i max0 = _mm_max_epu8( px0, px1 );
+    __m128i max1 = _mm_max_epu8( px2, px3 );
+    __m128i max2 = _mm_max_epu8( max0, max1 );
+
+    __m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i min4 = _mm_min_epu8( min2, min3 );
+    __m128i max4 = _mm_max_epu8( max2, max3 );
+
+    __m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i rmin = _mm_min_epu8( min4, min5 );
+    __m128i rmax = _mm_max_epu8( max4, max5 );
+
+    __m128i range1 = _mm_subs_epu8( rmax, rmin );
+    __m128i range2 = _mm_sad_epu8( rmax, rmin );
+
+    uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1;
+    __m128i range = _mm_set1_epi16( DivTable[vrange] );
+
+    __m128i inset1 = _mm_srli_epi16( range1, 4 );
+    __m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) );
+    __m128i min = _mm_adds_epu8( rmin, inset );
+    __m128i max = _mm_subs_epu8( rmax, inset );
+
+    __m128i c0 = _mm_subs_epu8( px0, rmin );
+    __m128i c1 = _mm_subs_epu8( px1, rmin );
+    __m128i c2 = _mm_subs_epu8( px2, rmin );
+    __m128i c3 = _mm_subs_epu8( px3, rmin );
+
+    __m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) );
+    __m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) );
+    __m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) );
+    __m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) );
+
+    __m128i s0 = _mm_hadd_epi16( is0, is1 );
+    __m128i s1 = _mm_hadd_epi16( is2, is3 );
+
+    __m128i m0 = _mm_mulhi_epu16( s0, range );
+    __m128i m1 = _mm_mulhi_epu16( s1, range );
+
+    __m128i p0 = _mm_packus_epi16( m0, m1 );
+
+    __m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) );
+    __m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 );
+    __m128i p3 = _mm_or_si128( p1, p2 );
+    __m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) );
+
+    uint32_t vmin = _mm_cvtsi128_si32( min );
+    uint32_t vmax = _mm_cvtsi128_si32( max );
+    uint32_t vp = _mm_cvtsi128_si32( p );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+}
+
+static etcpak_force_inline uint64_t ProcessAlpha_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    __m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );
+
+    __m128i m0 = _mm_shuffle_epi8( px0, mask );
+    __m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
+    __m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
+    __m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
+    __m128i m4 = _mm_or_si128( m0, m1 );
+    __m128i m5 = _mm_or_si128( m2, m3 );
+    __m128i a = _mm_or_si128( m4, m5 );
+
+    __m128i solidCmp = _mm_shuffle_epi8( a, _mm_setzero_si128() );
+    __m128i cmpRes = _mm_cmpeq_epi8( a, solidCmp );
+    if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
+    {
+        return _mm_cvtsi128_si32( a ) & 0xFF;
+    }
+
+    __m128i a1 = _mm_shuffle_epi32( a, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max1 = _mm_max_epu8( a, a1 );
+    __m128i min1 = _mm_min_epu8( a, a1 );
+    __m128i amax2 = _mm_shuffle_epi32( max1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i amin2 = _mm_shuffle_epi32( min1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max2 = _mm_max_epu8( max1, amax2 );
+    __m128i min2 = _mm_min_epu8( min1, amin2 );
+    __m128i amax3 = _mm_alignr_epi8( max2, max2, 2 );
+    __m128i amin3 = _mm_alignr_epi8( min2, min2, 2 );
+    __m128i max3 = _mm_max_epu8( max2, amax3 );
+    __m128i min3 = _mm_min_epu8( min2, amin3 );
+    __m128i amax4 = _mm_alignr_epi8( max3, max3, 1 );
+    __m128i amin4 = _mm_alignr_epi8( min3, min3, 1 );
+    __m128i max = _mm_max_epu8( max3, amax4 );
+    __m128i min = _mm_min_epu8( min3, amin4 );
+    __m128i minmax = _mm_unpacklo_epi8( max, min );
+
+    __m128i r = _mm_sub_epi8( max, min );
+    int range = _mm_cvtsi128_si32( r ) & 0xFF;
+    __m128i rv = _mm_set1_epi16( DivTableAlpha[range] );
+
+    __m128i v = _mm_sub_epi8( a, min );
+
+    __m128i lo16 = _mm_unpacklo_epi8( v, _mm_setzero_si128() );
+    __m128i hi16 = _mm_unpackhi_epi8( v, _mm_setzero_si128() );
+
+    __m128i lomul = _mm_mulhi_epu16( lo16, rv );
+    __m128i himul = _mm_mulhi_epu16( hi16, rv );
+
+    __m128i p0 = _mm_packus_epi16( lomul, himul );
+    __m128i p1 = _mm_or_si128( _mm_and_si128( p0, _mm_set1_epi16( 0x3F ) ), _mm_srai_epi16( _mm_and_si128( p0, _mm_set1_epi16( 0x3F00 ) ), 5 ) );
+    __m128i p2 = _mm_packus_epi16( p1, p1 );
+
+    uint64_t pi = _mm_cvtsi128_si64( p2 );
+    uint64_t data = 0;
+    for( int i=0; i<8; i++ )
+    {
+        uint64_t idx = AlphaIndexTable_SSE[(pi>>(i*8)) & 0x3F];
+        data |= idx << (i*6);
+    }
+    return (uint64_t)(uint16_t)_mm_cvtsi128_si32( minmax ) | ( data << 16 );
+}
+#endif
+
+void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+#ifdef __AVX2__
+    if( width%8 == 0 )
+    {
+        blocks /= 2;
+        uint32_t buf[8*4];
+        int i = 0;
+        char* dst8 = (char*)dst;
+
+        do
+        {
+            auto tmp = (char*)buf;
+            memcpy( tmp,        src + width * 0, 8*4 );
+            memcpy( tmp + 8*4,  src + width * 1, 8*4 );
+            memcpy( tmp + 16*4, src + width * 2, 8*4 );
+            memcpy( tmp + 24*4, src + width * 3, 8*4 );
+            src += 8;
+            if( ++i == width/8 )
+            {
+                src += width * 3;
+                i = 0;
+            }
+
+            ProcessRGB_AVX( (uint8_t*)buf, dst8 );
+        }
+        while( --blocks );
+    }
+    else
+#endif
+    {
+        uint32_t buf[4*4];
+        int i = 0;
+
+        auto ptr = dst;
+        do
+        {
+            auto tmp = (char*)buf;
+            memcpy( tmp,        src + width * 0, 4*4 );
+            memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+            memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+            memcpy( tmp + 12*4, src + width * 3, 4*4 );
+            src += 4;
+            if( ++i == width/4 )
+            {
+                src += width * 3;
+                i = 0;
+            }
+
+            const auto c = ProcessRGB( (uint8_t*)buf );
+            uint8_t fix[8];
+            memcpy( fix, &c, 8 );
+            for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+            memcpy( ptr, fix, sizeof( uint64_t ) );
+            ptr++;
+        }
+        while( --blocks );
+    }
+}
+
+void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    uint32_t buf[4*4];
+    int i = 0;
+
+    auto ptr = dst;
+    do
+    {
+        auto tmp = (char*)buf;
+        memcpy( tmp,        src + width * 0, 4*4 );
+        memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+        memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+        memcpy( tmp + 12*4, src + width * 3, 4*4 );
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        Dither( (uint8_t*)buf );
+
+        const auto c = ProcessRGB( (uint8_t*)buf );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+    }
+    while( --blocks );
+}
+
+void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int i = 0;
+    auto ptr = dst;
+    do
+    {
+#ifdef __SSE4_1__
+        __m128i px0 = _mm_loadu_si128( (__m128i*)( src + width * 0 ) );
+        __m128i px1 = _mm_loadu_si128( (__m128i*)( src + width * 1 ) );
+        __m128i px2 = _mm_loadu_si128( (__m128i*)( src + width * 2 ) );
+        __m128i px3 = _mm_loadu_si128( (__m128i*)( src + width * 3 ) );
+
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        *ptr++ = ProcessAlpha_SSE( px0, px1, px2, px3 );
+
+        const auto c = ProcessRGB_SSE( px0, px1, px2, px3 );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+#else
+        uint32_t rgba[4*4];
+        uint8_t alpha[4*4];
+
+        auto tmp = (char*)rgba;
+        memcpy( tmp,        src + width * 0, 4*4 );
+        memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+        memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+        memcpy( tmp + 12*4, src + width * 3, 4*4 );
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        for( int i=0; i<16; i++ )
+        {
+            alpha[i] = rgba[i] >> 24;
+            rgba[i] &= 0xFFFFFF;
+        }
+        *ptr++ = ProcessAlpha( alpha );
+
+        const auto c = ProcessRGB( (uint8_t*)rgba );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+#endif
+    }
+    while( --blocks );
+}
diff --git a/thirdparty/etcpak/ProcessDxtc.hpp b/thirdparty/etcpak/ProcessDxtc.hpp
new file mode 100644
index 0000000000..8e0b12e4bd
--- /dev/null
+++ b/thirdparty/etcpak/ProcessDxtc.hpp
@@ -0,0 +1,11 @@
+#ifndef __PROCESSDXT1_HPP__
+#define __PROCESSDXT1_HPP__
+
+#include <stddef.h>
+#include <stdint.h>
+
+void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+
+#endif
diff --git a/thirdparty/etcpak/ProcessRGB.cpp b/thirdparty/etcpak/ProcessRGB.cpp
new file mode 100644
index 0000000000..7f4524d105
--- /dev/null
+++ b/thirdparty/etcpak/ProcessRGB.cpp
@@ -0,0 +1,3100 @@
+#include <array>
+#include <string.h>
+#include <limits>
+
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+#include "Dither.hpp"
+#include "ForceInline.hpp"
+#include "Math.hpp"
+#include "ProcessCommon.hpp"
+#include "ProcessRGB.hpp"
+#include "Tables.hpp"
+#include "Vector.hpp"
+#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    include <Windows.h>
+#    define _bswap(x) _byteswap_ulong(x)
+#    define _bswap64(x) _byteswap_uint64(x)
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+#ifndef _bswap
+#  define _bswap(x) __builtin_bswap32(x)
+#  define _bswap64(x) __builtin_bswap64(x)
+#endif
+
+namespace
+{
+
+#if defined _MSC_VER && !defined __clang__
+static etcpak_force_inline unsigned long _bit_scan_forward( unsigned long mask )
+{
+    unsigned long ret;
+    _BitScanForward( &ret, mask );
+    return ret;
+}
+#endif
+
+typedef std::array<uint16_t, 4> v4i;
+
+#ifdef __AVX2__
+static etcpak_force_inline __m256i Sum4_AVX2( const uint8_t* data) noexcept
+{
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
+
+    __m256i t0 = _mm256_cvtepu8_epi16(dm0);
+    __m256i t1 = _mm256_cvtepu8_epi16(dm1);
+    __m256i t2 = _mm256_cvtepu8_epi16(dm2);
+    __m256i t3 = _mm256_cvtepu8_epi16(dm3);
+
+    __m256i sum0 = _mm256_add_epi16(t0, t1);
+    __m256i sum1 = _mm256_add_epi16(t2, t3);
+
+    __m256i s0 = _mm256_permute2x128_si256(sum0, sum1, (0) | (3 << 4)); // 0, 0, 3, 3
+    __m256i s1 = _mm256_permute2x128_si256(sum0, sum1, (1) | (2 << 4)); // 1, 1, 2, 2
+
+    __m256i s2 = _mm256_permute4x64_epi64(s0, _MM_SHUFFLE(1, 3, 0, 2));
+    __m256i s3 = _mm256_permute4x64_epi64(s0, _MM_SHUFFLE(0, 2, 1, 3));
+    __m256i s4 = _mm256_permute4x64_epi64(s1, _MM_SHUFFLE(3, 1, 0, 2));
+    __m256i s5 = _mm256_permute4x64_epi64(s1, _MM_SHUFFLE(2, 0, 1, 3));
+
+    __m256i sum5 = _mm256_add_epi16(s2, s3); //   3,   0,   3,   0
+    __m256i sum6 = _mm256_add_epi16(s4, s5); //   2,   1,   1,   2
+    return _mm256_add_epi16(sum5, sum6);     // 3+2, 0+1, 3+1, 3+2
+}
+
+static etcpak_force_inline __m256i Average_AVX2( const __m256i data) noexcept
+{
+    __m256i a = _mm256_add_epi16(data, _mm256_set1_epi16(4));
+
+    return _mm256_srli_epi16(a, 3);
+}
+
+static etcpak_force_inline __m128i CalcErrorBlock_AVX2( const __m256i data, const v4i a[8]) noexcept
+{
+    //
+    __m256i a0 = _mm256_load_si256((__m256i*)a[0].data());
+    __m256i a1 = _mm256_load_si256((__m256i*)a[4].data());
+
+    // err = 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
+    __m256i a4 = _mm256_madd_epi16(a0, a0);
+    __m256i a5 = _mm256_madd_epi16(a1, a1);
+
+    __m256i a6 = _mm256_hadd_epi32(a4, a5);
+    __m256i a7 = _mm256_slli_epi32(a6, 3);
+
+    __m256i a8 = _mm256_add_epi32(a7, _mm256_set1_epi32(0x3FFFFFFF)); // Big value to prevent negative values, but small enough to prevent overflow
+
+    // average is not swapped
+    // err -= block[0] * 2 * average[0];
+    // err -= block[1] * 2 * average[1];
+    // err -= block[2] * 2 * average[2];
+    __m256i a2 = _mm256_slli_epi16(a0, 1);
+    __m256i a3 = _mm256_slli_epi16(a1, 1);
+    __m256i b0 = _mm256_madd_epi16(a2, data);
+    __m256i b1 = _mm256_madd_epi16(a3, data);
+
+    __m256i b2 = _mm256_hadd_epi32(b0, b1);
+    __m256i b3 = _mm256_sub_epi32(a8, b2);
+    __m256i b4 = _mm256_hadd_epi32(b3, b3);
+
+    __m256i b5 = _mm256_permutevar8x32_epi32(b4, _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0));
+
+    return _mm256_castsi256_si128(b5);
+}
+
+static etcpak_force_inline void ProcessAverages_AVX2(const __m256i d, v4i a[8] ) noexcept
+{
+    __m256i t = _mm256_add_epi16(_mm256_mullo_epi16(d, _mm256_set1_epi16(31)), _mm256_set1_epi16(128));
+
+    __m256i c = _mm256_srli_epi16(_mm256_add_epi16(t, _mm256_srli_epi16(t, 8)), 8);
+
+    __m256i c1 = _mm256_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
+    __m256i diff = _mm256_sub_epi16(c, c1);
+    diff = _mm256_max_epi16(diff, _mm256_set1_epi16(-4));
+    diff = _mm256_min_epi16(diff, _mm256_set1_epi16(3));
+
+    __m256i co = _mm256_add_epi16(c1, diff);
+
+    c = _mm256_blend_epi16(co, c, 0xF0);
+
+    __m256i a0 = _mm256_or_si256(_mm256_slli_epi16(c, 3), _mm256_srli_epi16(c, 2));
+
+    _mm256_store_si256((__m256i*)a[4].data(), a0);
+
+    __m256i t0 = _mm256_add_epi16(_mm256_mullo_epi16(d, _mm256_set1_epi16(15)), _mm256_set1_epi16(128));
+    __m256i t1 = _mm256_srli_epi16(_mm256_add_epi16(t0, _mm256_srli_epi16(t0, 8)), 8);
+
+    __m256i t2 = _mm256_or_si256(t1, _mm256_slli_epi16(t1, 4));
+
+    _mm256_store_si256((__m256i*)a[0].data(), t2);
+}
+
+static etcpak_force_inline uint64_t EncodeAverages_AVX2( const v4i a[8], size_t idx ) noexcept
+{
+    uint64_t d = ( idx << 24 );
+    size_t base = idx << 1;
+
+    __m128i a0 = _mm_load_si128((const __m128i*)a[base].data());
+
+    __m128i r0, r1;
+
+    if( ( idx & 0x2 ) == 0 )
+    {
+        r0 = _mm_srli_epi16(a0, 4);
+
+        __m128i a1 = _mm_unpackhi_epi64(r0, r0);
+        r1 = _mm_slli_epi16(a1, 4);
+    }
+    else
+    {
+        __m128i a1 = _mm_and_si128(a0, _mm_set1_epi16(-8));
+
+        r0 = _mm_unpackhi_epi64(a1, a1);
+        __m128i a2 = _mm_sub_epi16(a1, r0);
+        __m128i a3 = _mm_srai_epi16(a2, 3);
+        r1 = _mm_and_si128(a3, _mm_set1_epi16(0x07));
+    }
+
+    __m128i r2 = _mm_or_si128(r0, r1);
+    // do missing swap for average values
+    __m128i r3 = _mm_shufflelo_epi16(r2, _MM_SHUFFLE(3, 0, 1, 2));
+    __m128i r4 = _mm_packus_epi16(r3, _mm_setzero_si128());
+    d |= _mm_cvtsi128_si32(r4);
+
+    return d;
+}
+
+static etcpak_force_inline uint64_t CheckSolid_AVX2( const uint8_t* src ) noexcept
+{
+    __m256i d0 = _mm256_loadu_si256(((__m256i*)src) + 0);
+    __m256i d1 = _mm256_loadu_si256(((__m256i*)src) + 1);
+
+    __m256i c = _mm256_broadcastd_epi32(_mm256_castsi256_si128(d0));
+
+    __m256i c0 = _mm256_cmpeq_epi8(d0, c);
+    __m256i c1 = _mm256_cmpeq_epi8(d1, c);
+
+    __m256i m = _mm256_and_si256(c0, c1);
+
+    if (!_mm256_testc_si256(m, _mm256_set1_epi32(-1)))
+    {
+        return 0;
+    }
+
+    return 0x02000000 |
+        ( (unsigned int)( src[0] & 0xF8 ) << 16 ) |
+        ( (unsigned int)( src[1] & 0xF8 ) << 8 ) |
+        ( (unsigned int)( src[2] & 0xF8 ) );
+}
+
+static etcpak_force_inline __m128i PrepareAverages_AVX2( v4i a[8], const uint8_t* src) noexcept
+{
+    __m256i sum4 = Sum4_AVX2( src );
+
+    ProcessAverages_AVX2(Average_AVX2( sum4 ), a );
+
+    return CalcErrorBlock_AVX2( sum4, a);
+}
+
+static etcpak_force_inline __m128i PrepareAverages_AVX2( v4i a[8], const __m256i sum4) noexcept
+{
+    ProcessAverages_AVX2(Average_AVX2( sum4 ), a );
+
+    return CalcErrorBlock_AVX2( sum4, a);
+}
+
+static etcpak_force_inline void FindBestFit_4x2_AVX2( uint32_t terr[2][8], uint32_t tsel[8], v4i a[8], const uint32_t offset, const uint8_t* data) noexcept
+{
+    __m256i sel0 = _mm256_setzero_si256();
+    __m256i sel1 = _mm256_setzero_si256();
+
+    for (unsigned int j = 0; j < 2; ++j)
+    {
+        unsigned int bid = offset + 1 - j;
+
+        __m256i squareErrorSum = _mm256_setzero_si256();
+
+        __m128i a0 = _mm_loadl_epi64((const __m128i*)a[bid].data());
+        __m256i a1 = _mm256_broadcastq_epi64(a0);
+
+        // Processing one full row each iteration
+        for (size_t i = 0; i < 8; i += 4)
+        {
+            __m128i rgb = _mm_loadu_si128((const __m128i*)(data + i * 4));
+
+            __m256i rgb16 = _mm256_cvtepu8_epi16(rgb);
+            __m256i d = _mm256_sub_epi16(a1, rgb16);
+
+            // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+            // This produces slightly different results, but is significant faster
+            __m256i pixel0 = _mm256_madd_epi16(d, _mm256_set_epi16(0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14));
+            __m256i pixel1 = _mm256_packs_epi32(pixel0, pixel0);
+            __m256i pixel2 = _mm256_hadd_epi16(pixel1, pixel1);
+            __m128i pixel3 = _mm256_castsi256_si128(pixel2);
+
+            __m128i pix0 = _mm_broadcastw_epi16(pixel3);
+            __m128i pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+            __m256i pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+            // Processing first two pixels of the row
+            {
+                __m256i pix = _mm256_abs_epi16(pixel);
+
+                // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+                // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+                __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+                __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+                __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+                __m256i minError = _mm256_min_epi16(error0, error1);
+
+                // Exploiting symmetry of the selector table and use the sign bit
+                // This produces slightly different results, but is significant faster
+                __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+                // Interleaving values so madd instruction can be used
+                __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+                __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+                __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+                // Squaring the minimum error to produce correct values when adding
+                __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+                squareErrorSum = _mm256_add_epi32(squareErrorSum, squareError);
+
+                // Packing selector bits
+                __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i + j * 8));
+
+                sel0 = _mm256_or_si256(sel0, minIndexLo2);
+                sel1 = _mm256_or_si256(sel1, minIndexHi2);
+            }
+
+            pixel3 = _mm256_extracti128_si256(pixel2, 1);
+            pix0 = _mm_broadcastw_epi16(pixel3);
+            pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+            pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+            // Processing second two pixels of the row
+            {
+                __m256i pix = _mm256_abs_epi16(pixel);
+
+                // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+                // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+                __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+                __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+                __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+                __m256i minError = _mm256_min_epi16(error0, error1);
+
+                // Exploiting symmetry of the selector table and use the sign bit
+                __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+                // Interleaving values so madd instruction can be used
+                __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+                __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+                __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+                // Squaring the minimum error to produce correct values when adding
+                __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+                squareErrorSum = _mm256_add_epi32(squareErrorSum, squareError);
+
+                // Packing selector bits
+                __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexLo3 = _mm256_slli_epi16(minIndexLo2, 2);
+                __m256i minIndexHi3 = _mm256_slli_epi16(minIndexHi2, 2);
+
+                sel0 = _mm256_or_si256(sel0, minIndexLo3);
+                sel1 = _mm256_or_si256(sel1, minIndexHi3);
+            }
+        }
+
+        data += 8 * 4;
+
+        _mm256_store_si256((__m256i*)terr[1 - j], squareErrorSum);
+    }
+
+    // Interleave selector bits
+    __m256i minIndexLo0 = _mm256_unpacklo_epi16(sel0, sel1);
+    __m256i minIndexHi0 = _mm256_unpackhi_epi16(sel0, sel1);
+
+    __m256i minIndexLo1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (0) | (2 << 4));
+    __m256i minIndexHi1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (1) | (3 << 4));
+
+    __m256i minIndexHi2 = _mm256_slli_epi32(minIndexHi1, 1);
+
+    __m256i sel = _mm256_or_si256(minIndexLo1, minIndexHi2);
+
+    _mm256_store_si256((__m256i*)tsel, sel);
+}
+
+static etcpak_force_inline void FindBestFit_2x4_AVX2( uint32_t terr[2][8], uint32_t tsel[8], v4i a[8], const uint32_t offset, const uint8_t* data) noexcept
+{
+    __m256i sel0 = _mm256_setzero_si256();
+    __m256i sel1 = _mm256_setzero_si256();
+
+    __m256i squareErrorSum0 = _mm256_setzero_si256();
+    __m256i squareErrorSum1 = _mm256_setzero_si256();
+
+    __m128i a0 = _mm_loadl_epi64((const __m128i*)a[offset + 1].data());
+    __m128i a1 = _mm_loadl_epi64((const __m128i*)a[offset + 0].data());
+
+    __m128i a2 = _mm_broadcastq_epi64(a0);
+    __m128i a3 = _mm_broadcastq_epi64(a1);
+    __m256i a4 = _mm256_insertf128_si256(_mm256_castsi128_si256(a2), a3, 1);
+
+    // Processing one full row each iteration
+    for (size_t i = 0; i < 16; i += 4)
+    {
+        __m128i rgb = _mm_loadu_si128((const __m128i*)(data + i * 4));
+
+        __m256i rgb16 = _mm256_cvtepu8_epi16(rgb);
+        __m256i d = _mm256_sub_epi16(a4, rgb16);
+
+        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+        // This produces slightly different results, but is significant faster
+        __m256i pixel0 = _mm256_madd_epi16(d, _mm256_set_epi16(0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14));
+        __m256i pixel1 = _mm256_packs_epi32(pixel0, pixel0);
+        __m256i pixel2 = _mm256_hadd_epi16(pixel1, pixel1);
+        __m128i pixel3 = _mm256_castsi256_si128(pixel2);
+
+        __m128i pix0 = _mm_broadcastw_epi16(pixel3);
+        __m128i pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+        __m256i pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+        // Processing first two pixels of the row
+        {
+            __m256i pix = _mm256_abs_epi16(pixel);
+
+            // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+            // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+            __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+            __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+            __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+            __m256i minError = _mm256_min_epi16(error0, error1);
+
+            // Exploiting symmetry of the selector table and use the sign bit
+            __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+            // Interleaving values so madd instruction can be used
+            __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+            __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+            __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+            // Squaring the minimum error to produce correct values when adding
+            __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+            squareErrorSum0 = _mm256_add_epi32(squareErrorSum0, squareError);
+
+            // Packing selector bits
+            __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i));
+            __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i));
+
+            sel0 = _mm256_or_si256(sel0, minIndexLo2);
+            sel1 = _mm256_or_si256(sel1, minIndexHi2);
+        }
+
+        pixel3 = _mm256_extracti128_si256(pixel2, 1);
+        pix0 = _mm_broadcastw_epi16(pixel3);
+        pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+        pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+        // Processing second two pixels of the row
+        {
+            __m256i pix = _mm256_abs_epi16(pixel);
+
+            // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+            // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+            __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+            __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+            __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+            __m256i minError = _mm256_min_epi16(error0, error1);
+
+            // Exploiting symmetry of the selector table and use the sign bit
+            __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+            // Interleaving values so madd instruction can be used
+            __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+            __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+            __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+            // Squaring the minimum error to produce correct values when adding
+            __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+            squareErrorSum1 = _mm256_add_epi32(squareErrorSum1, squareError);
+
+            // Packing selector bits
+            __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i));
+            __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i));
+            __m256i minIndexLo3 = _mm256_slli_epi16(minIndexLo2, 2);
+            __m256i minIndexHi3 = _mm256_slli_epi16(minIndexHi2, 2);
+
+            sel0 = _mm256_or_si256(sel0, minIndexLo3);
+            sel1 = _mm256_or_si256(sel1, minIndexHi3);
+        }
+    }
+
+    _mm256_store_si256((__m256i*)terr[1], squareErrorSum0);
+    _mm256_store_si256((__m256i*)terr[0], squareErrorSum1);
+
+    // Interleave selector bits
+    __m256i minIndexLo0 = _mm256_unpacklo_epi16(sel0, sel1);
+    __m256i minIndexHi0 = _mm256_unpackhi_epi16(sel0, sel1);
+
+    __m256i minIndexLo1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (0) | (2 << 4));
+    __m256i minIndexHi1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (1) | (3 << 4));
+
+    __m256i minIndexHi2 = _mm256_slli_epi32(minIndexHi1, 1);
+
+    __m256i sel = _mm256_or_si256(minIndexLo1, minIndexHi2);
+
+    _mm256_store_si256((__m256i*)tsel, sel);
+}
+
+static etcpak_force_inline uint64_t EncodeSelectors_AVX2( uint64_t d, const uint32_t terr[2][8], const uint32_t tsel[8], const bool rotate) noexcept
+{
+    size_t tidx[2];
+
+    // Get index of minimum error (terr[0] and terr[1])
+    __m256i err0 = _mm256_load_si256((const __m256i*)terr[0]);
+    __m256i err1 = _mm256_load_si256((const __m256i*)terr[1]);
+
+    __m256i errLo = _mm256_permute2x128_si256(err0, err1, (0) | (2 << 4));
+    __m256i errHi = _mm256_permute2x128_si256(err0, err1, (1) | (3 << 4));
+
+    __m256i errMin0 = _mm256_min_epu32(errLo, errHi);
+
+    __m256i errMin1 = _mm256_shuffle_epi32(errMin0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m256i errMin2 = _mm256_min_epu32(errMin0, errMin1);
+
+    __m256i errMin3 = _mm256_shuffle_epi32(errMin2, _MM_SHUFFLE(1, 0, 3, 2));
+    __m256i errMin4 = _mm256_min_epu32(errMin3, errMin2);
+
+    __m256i errMin5 = _mm256_permute2x128_si256(errMin4, errMin4, (0) | (0 << 4));
+    __m256i errMin6 = _mm256_permute2x128_si256(errMin4, errMin4, (1) | (1 << 4));
+
+    __m256i errMask0 = _mm256_cmpeq_epi32(errMin5, err0);
+    __m256i errMask1 = _mm256_cmpeq_epi32(errMin6, err1);
+
+    uint32_t mask0 = _mm256_movemask_epi8(errMask0);
+    uint32_t mask1 = _mm256_movemask_epi8(errMask1);
+
+    tidx[0] = _bit_scan_forward(mask0) >> 2;
+    tidx[1] = _bit_scan_forward(mask1) >> 2;
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+
+    unsigned int t0 = tsel[tidx[0]];
+    unsigned int t1 = tsel[tidx[1]];
+
+    if (!rotate)
+    {
+        t0 &= 0xFF00FF00;
+        t1 &= 0x00FF00FF;
+    }
+    else
+    {
+        t0 &= 0xCCCCCCCC;
+        t1 &= 0x33333333;
+    }
+
+    // Flip selectors from sign bit
+    unsigned int t2 = (t0 | t1) ^ 0xFFFF0000;
+
+    return d | static_cast<uint64_t>(_bswap(t2)) << 32;
+}
+
+static etcpak_force_inline __m128i r6g7b6_AVX2(__m128 cof, __m128 chf, __m128 cvf) noexcept
+{
+    __m128i co = _mm_cvttps_epi32(cof);
+    __m128i ch = _mm_cvttps_epi32(chf);
+    __m128i cv = _mm_cvttps_epi32(cvf);
+
+    __m128i coh = _mm_packus_epi32(co, ch);
+    __m128i cv0 = _mm_packus_epi32(cv, _mm_setzero_si128());
+
+    __m256i cohv0 = _mm256_inserti128_si256(_mm256_castsi128_si256(coh), cv0, 1);
+    __m256i cohv1 = _mm256_min_epu16(cohv0, _mm256_set1_epi16(1023));
+
+    __m256i cohv2 = _mm256_sub_epi16(cohv1, _mm256_set1_epi16(15));
+    __m256i cohv3 = _mm256_srai_epi16(cohv2, 1);
+
+    __m256i cohvrb0 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(11));
+    __m256i cohvrb1 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(4));
+    __m256i cohvg0 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(9));
+    __m256i cohvg1 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(6));
+
+    __m256i cohvrb2 = _mm256_srai_epi16(cohvrb0, 7);
+    __m256i cohvrb3 = _mm256_srai_epi16(cohvrb1, 7);
+    __m256i cohvg2 = _mm256_srai_epi16(cohvg0, 8);
+    __m256i cohvg3 = _mm256_srai_epi16(cohvg1, 8);
+
+    __m256i cohvrb4 = _mm256_sub_epi16(cohvrb0, cohvrb2);
+    __m256i cohvrb5 = _mm256_sub_epi16(cohvrb4, cohvrb3);
+    __m256i cohvg4 = _mm256_sub_epi16(cohvg0, cohvg2);
+    __m256i cohvg5 = _mm256_sub_epi16(cohvg4, cohvg3);
+
+    __m256i cohvrb6 = _mm256_srai_epi16(cohvrb5, 3);
+    __m256i cohvg6 = _mm256_srai_epi16(cohvg5, 2);
+
+    __m256i cohv4 = _mm256_blend_epi16(cohvg6, cohvrb6, 0x55);
+
+    __m128i cohv5 = _mm_packus_epi16(_mm256_castsi256_si128(cohv4), _mm256_extracti128_si256(cohv4, 1));
+    return _mm_shuffle_epi8(cohv5, _mm_setr_epi8(6, 5, 4, -1, 2, 1, 0, -1, 10, 9, 8, -1, -1, -1, -1, -1));
+}
+
+struct Plane
+{
+    uint64_t plane;
+    uint64_t error;
+    __m256i sum4;
+};
+
+static etcpak_force_inline Plane Planar_AVX2(const uint8_t* src)
+{
+    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i rgb0 = _mm_shuffle_epi8(d0, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb1 = _mm_shuffle_epi8(d1, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb2 = _mm_shuffle_epi8(d2, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb3 = _mm_shuffle_epi8(d3, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+
+    __m128i rg0 = _mm_unpacklo_epi32(rgb0, rgb1);
+    __m128i rg1 = _mm_unpacklo_epi32(rgb2, rgb3);
+    __m128i b0 = _mm_unpackhi_epi32(rgb0, rgb1);
+    __m128i b1 = _mm_unpackhi_epi32(rgb2, rgb3);
+
+    // swap channels
+    __m128i b8 = _mm_unpacklo_epi64(rg0, rg1);
+    __m128i g8 = _mm_unpackhi_epi64(rg0, rg1);
+    __m128i r8 = _mm_unpacklo_epi64(b0, b1);
+
+    __m128i t0 = _mm_sad_epu8(r8, _mm_setzero_si128());
+    __m128i t1 = _mm_sad_epu8(g8, _mm_setzero_si128());
+    __m128i t2 = _mm_sad_epu8(b8, _mm_setzero_si128());
+
+    __m128i r8s = _mm_shuffle_epi8(r8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+    __m128i g8s = _mm_shuffle_epi8(g8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+    __m128i b8s = _mm_shuffle_epi8(b8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+
+    __m128i s0 = _mm_sad_epu8(r8s, _mm_setzero_si128());
+    __m128i s1 = _mm_sad_epu8(g8s, _mm_setzero_si128());
+    __m128i s2 = _mm_sad_epu8(b8s, _mm_setzero_si128());
+
+    __m256i sr0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t0), s0, 1);
+    __m256i sg0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t1), s1, 1);
+    __m256i sb0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t2), s2, 1);
+
+    __m256i sr1 = _mm256_slli_epi64(sr0, 32);
+    __m256i sg1 = _mm256_slli_epi64(sg0, 16);
+
+    __m256i srb = _mm256_or_si256(sr1, sb0);
+    __m256i srgb = _mm256_or_si256(srb, sg1);
+
+    __m128i t3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(t0), _mm_castsi128_ps(t1), _MM_SHUFFLE(2, 0, 2, 0)));
+    __m128i t4 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3, 1, 2, 0));
+    __m128i t5 = _mm_hadd_epi32(t3, t4);
+    __m128i t6 = _mm_shuffle_epi32(t5, _MM_SHUFFLE(1, 1, 1, 1));
+    __m128i t7 = _mm_shuffle_epi32(t5, _MM_SHUFFLE(2, 2, 2, 2));
+
+    __m256i sr = _mm256_broadcastw_epi16(t5);
+    __m256i sg = _mm256_broadcastw_epi16(t6);
+    __m256i sb = _mm256_broadcastw_epi16(t7);
+
+    __m256i r08 = _mm256_cvtepu8_epi16(r8);
+    __m256i g08 = _mm256_cvtepu8_epi16(g8);
+    __m256i b08 = _mm256_cvtepu8_epi16(b8);
+
+    __m256i r16 = _mm256_slli_epi16(r08, 4);
+    __m256i g16 = _mm256_slli_epi16(g08, 4);
+    __m256i b16 = _mm256_slli_epi16(b08, 4);
+
+    __m256i difR0 = _mm256_sub_epi16(r16, sr);
+    __m256i difG0 = _mm256_sub_epi16(g16, sg);
+    __m256i difB0 = _mm256_sub_epi16(b16, sb);
+
+    __m256i difRyz = _mm256_madd_epi16(difR0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+    __m256i difGyz = _mm256_madd_epi16(difG0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+    __m256i difByz = _mm256_madd_epi16(difB0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+
+    __m256i difRxz = _mm256_madd_epi16(difR0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+    __m256i difGxz = _mm256_madd_epi16(difG0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+    __m256i difBxz = _mm256_madd_epi16(difB0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+
+    __m256i difRGyz = _mm256_hadd_epi32(difRyz, difGyz);
+    __m256i difByzxz = _mm256_hadd_epi32(difByz, difBxz);
+
+    __m256i difRGxz = _mm256_hadd_epi32(difRxz, difGxz);
+
+    __m128i sumRGyz = _mm_add_epi32(_mm256_castsi256_si128(difRGyz), _mm256_extracti128_si256(difRGyz, 1));
+    __m128i sumByzxz = _mm_add_epi32(_mm256_castsi256_si128(difByzxz), _mm256_extracti128_si256(difByzxz, 1));
+    __m128i sumRGxz = _mm_add_epi32(_mm256_castsi256_si128(difRGxz), _mm256_extracti128_si256(difRGxz, 1));
+
+    __m128i sumRGByz = _mm_hadd_epi32(sumRGyz, sumByzxz);
+    __m128i sumRGByzxz = _mm_hadd_epi32(sumRGxz, sumByzxz);
+
+    __m128i sumRGBxz = _mm_shuffle_epi32(sumRGByzxz, _MM_SHUFFLE(2, 3, 1, 0));
+
+    __m128 sumRGByzf = _mm_cvtepi32_ps(sumRGByz);
+    __m128 sumRGBxzf = _mm_cvtepi32_ps(sumRGBxz);
+
+    const float value = (255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f;
+
+    __m128 scale = _mm_set1_ps(-4.0f / value);
+
+    __m128 af = _mm_mul_ps(sumRGBxzf, scale);
+    __m128 bf = _mm_mul_ps(sumRGByzf, scale);
+
+    __m128 df = _mm_mul_ps(_mm_cvtepi32_ps(t5), _mm_set1_ps(4.0f / 16.0f));
+
+    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
+    __m128 cof0 = _mm_fnmadd_ps(af, _mm_set1_ps(-255.0f), _mm_fnmadd_ps(bf, _mm_set1_ps(-255.0f), df));
+    __m128 chf0 = _mm_fnmadd_ps(af, _mm_set1_ps( 425.0f), _mm_fnmadd_ps(bf, _mm_set1_ps(-255.0f), df));
+    __m128 cvf0 = _mm_fnmadd_ps(af, _mm_set1_ps(-255.0f), _mm_fnmadd_ps(bf, _mm_set1_ps( 425.0f), df));
+
+    // convert to r6g7b6
+    __m128i cohv = r6g7b6_AVX2(cof0, chf0, cvf0);
+
+    uint64_t rgbho = _mm_extract_epi64(cohv, 0);
+    uint32_t rgbv0 = _mm_extract_epi32(cohv, 2);
+
+    // Error calculation
+    auto ro0 = (rgbho >> 48) & 0x3F;
+    auto go0 = (rgbho >> 40) & 0x7F;
+    auto bo0 = (rgbho >> 32) & 0x3F;
+    auto ro1 = (ro0 >> 4) | (ro0 << 2);
+    auto go1 = (go0 >> 6) | (go0 << 1);
+    auto bo1 = (bo0 >> 4) | (bo0 << 2);
+    auto ro2 = (ro1 << 2) + 2;
+    auto go2 = (go1 << 2) + 2;
+    auto bo2 = (bo1 << 2) + 2;
+
+    __m256i ro3 = _mm256_set1_epi16(ro2);
+    __m256i go3 = _mm256_set1_epi16(go2);
+    __m256i bo3 = _mm256_set1_epi16(bo2);
+
+    auto rh0 = (rgbho >> 16) & 0x3F;
+    auto gh0 = (rgbho >>  8) & 0x7F;
+    auto bh0 = (rgbho >>  0) & 0x3F;
+    auto rh1 = (rh0 >> 4) | (rh0 << 2);
+    auto gh1 = (gh0 >> 6) | (gh0 << 1);
+    auto bh1 = (bh0 >> 4) | (bh0 << 2);
+
+    auto rh2 = rh1 - ro1;
+    auto gh2 = gh1 - go1;
+    auto bh2 = bh1 - bo1;
+
+    __m256i rh3 = _mm256_set1_epi16(rh2);
+    __m256i gh3 = _mm256_set1_epi16(gh2);
+    __m256i bh3 = _mm256_set1_epi16(bh2);
+
+    auto rv0 = (rgbv0 >> 16) & 0x3F;
+    auto gv0 = (rgbv0 >>  8) & 0x7F;
+    auto bv0 = (rgbv0 >>  0) & 0x3F;
+    auto rv1 = (rv0 >> 4) | (rv0 << 2);
+    auto gv1 = (gv0 >> 6) | (gv0 << 1);
+    auto bv1 = (bv0 >> 4) | (bv0 << 2);
+
+    auto rv2 = rv1 - ro1;
+    auto gv2 = gv1 - go1;
+    auto bv2 = bv1 - bo1;
+
+    __m256i rv3 = _mm256_set1_epi16(rv2);
+    __m256i gv3 = _mm256_set1_epi16(gv2);
+    __m256i bv3 = _mm256_set1_epi16(bv2);
+
+    __m256i x = _mm256_set_epi16(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+
+    __m256i rh4 = _mm256_mullo_epi16(rh3, x);
+    __m256i gh4 = _mm256_mullo_epi16(gh3, x);
+    __m256i bh4 = _mm256_mullo_epi16(bh3, x);
+
+    __m256i y = _mm256_set_epi16(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0);
+
+    __m256i rv4 = _mm256_mullo_epi16(rv3, y);
+    __m256i gv4 = _mm256_mullo_epi16(gv3, y);
+    __m256i bv4 = _mm256_mullo_epi16(bv3, y);
+
+    __m256i rxy = _mm256_add_epi16(rh4, rv4);
+    __m256i gxy = _mm256_add_epi16(gh4, gv4);
+    __m256i bxy = _mm256_add_epi16(bh4, bv4);
+
+    __m256i rp0 = _mm256_add_epi16(rxy, ro3);
+    __m256i gp0 = _mm256_add_epi16(gxy, go3);
+    __m256i bp0 = _mm256_add_epi16(bxy, bo3);
+
+    __m256i rp1 = _mm256_srai_epi16(rp0, 2);
+    __m256i gp1 = _mm256_srai_epi16(gp0, 2);
+    __m256i bp1 = _mm256_srai_epi16(bp0, 2);
+
+    __m256i rp2 = _mm256_max_epi16(_mm256_min_epi16(rp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+    __m256i gp2 = _mm256_max_epi16(_mm256_min_epi16(gp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+    __m256i bp2 = _mm256_max_epi16(_mm256_min_epi16(bp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+
+    __m256i rdif = _mm256_sub_epi16(r08, rp2);
+    __m256i gdif = _mm256_sub_epi16(g08, gp2);
+    __m256i bdif = _mm256_sub_epi16(b08, bp2);
+
+    __m256i rerr = _mm256_mullo_epi16(rdif, _mm256_set1_epi16(38));
+    __m256i gerr = _mm256_mullo_epi16(gdif, _mm256_set1_epi16(76));
+    __m256i berr = _mm256_mullo_epi16(bdif, _mm256_set1_epi16(14));
+
+    __m256i sum0 = _mm256_add_epi16(rerr, gerr);
+    __m256i sum1 = _mm256_add_epi16(sum0, berr);
+
+    __m256i sum2 = _mm256_madd_epi16(sum1, sum1);
+
+    __m128i sum3 = _mm_add_epi32(_mm256_castsi256_si128(sum2), _mm256_extracti128_si256(sum2, 1));
+
+    uint32_t err0 = _mm_extract_epi32(sum3, 0);
+    uint32_t err1 = _mm_extract_epi32(sum3, 1);
+    uint32_t err2 = _mm_extract_epi32(sum3, 2);
+    uint32_t err3 = _mm_extract_epi32(sum3, 3);
+
+    uint64_t error = err0 + err1 + err2 + err3;
+    /**/
+
+    uint32_t rgbv = ( rgbv0 & 0x3F ) | ( ( rgbv0 >> 2 ) & 0x1FC0 ) | ( ( rgbv0 >> 3 ) & 0x7E000 );
+    uint64_t rgbho0_ = ( rgbho & 0x3F0000003F ) | ( ( rgbho >> 2 ) & 0x1FC000001FC0 ) | ( ( rgbho >> 3 ) & 0x7E0000007E000 );
+    uint64_t rgbho0 = ( rgbho0_ & 0x7FFFF ) | ( ( rgbho0_ >> 13 ) & 0x3FFFF80000 );
+
+    uint32_t hi = rgbv | ((rgbho0 & 0x1FFF) << 19);
+    rgbho0 >>= 13;
+    uint32_t lo = ( rgbho0 & 0x1 ) | ( ( rgbho0 & 0x1FE ) << 1 ) | ( ( rgbho0 & 0x600 ) << 2 ) | ( ( rgbho0 & 0x3F800 ) << 5 ) | ( ( rgbho0 & 0x1FC0000 ) << 6 );
+
+    uint32_t idx = ( ( rgbho >> 33 ) & 0xF ) | ( ( rgbho >> 41 ) & 0x10 ) | ( ( rgbho >> 48 ) & 0x20 );
+    lo |= g_flags[idx];
+    uint64_t result = static_cast<uint32_t>(_bswap(lo));
+    result |= static_cast<uint64_t>(static_cast<uint32_t>(_bswap(hi))) << 32;
+
+    Plane plane;
+
+    plane.plane = result;
+    plane.error = error;
+    plane.sum4 = _mm256_permute4x64_epi64(srgb, _MM_SHUFFLE(2, 3, 0, 1));
+
+    return plane;
+}
+
+static etcpak_force_inline uint64_t EncodeSelectors_AVX2( uint64_t d, const uint32_t terr[2][8], const uint32_t tsel[8], const bool rotate, const uint64_t value, const uint32_t error) noexcept
+{
+    size_t tidx[2];
+
+    // Get index of minimum error (terr[0] and terr[1])
+    __m256i err0 = _mm256_load_si256((const __m256i*)terr[0]);
+    __m256i err1 = _mm256_load_si256((const __m256i*)terr[1]);
+
+    __m256i errLo = _mm256_permute2x128_si256(err0, err1, (0) | (2 << 4));
+    __m256i errHi = _mm256_permute2x128_si256(err0, err1, (1) | (3 << 4));
+
+    __m256i errMin0 = _mm256_min_epu32(errLo, errHi);
+
+    __m256i errMin1 = _mm256_shuffle_epi32(errMin0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m256i errMin2 = _mm256_min_epu32(errMin0, errMin1);
+
+    __m256i errMin3 = _mm256_shuffle_epi32(errMin2, _MM_SHUFFLE(1, 0, 3, 2));
+    __m256i errMin4 = _mm256_min_epu32(errMin3, errMin2);
+
+    __m256i errMin5 = _mm256_permute2x128_si256(errMin4, errMin4, (0) | (0 << 4));
+    __m256i errMin6 = _mm256_permute2x128_si256(errMin4, errMin4, (1) | (1 << 4));
+
+    __m256i errMask0 = _mm256_cmpeq_epi32(errMin5, err0);
+    __m256i errMask1 = _mm256_cmpeq_epi32(errMin6, err1);
+
+    uint32_t mask0 = _mm256_movemask_epi8(errMask0);
+    uint32_t mask1 = _mm256_movemask_epi8(errMask1);
+
+    tidx[0] = _bit_scan_forward(mask0) >> 2;
+    tidx[1] = _bit_scan_forward(mask1) >> 2;
+
+    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
+    {
+        return value;
+    }
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+
+    unsigned int t0 = tsel[tidx[0]];
+    unsigned int t1 = tsel[tidx[1]];
+
+    if (!rotate)
+    {
+        t0 &= 0xFF00FF00;
+        t1 &= 0x00FF00FF;
+    }
+    else
+    {
+        t0 &= 0xCCCCCCCC;
+        t1 &= 0x33333333;
+    }
+
+    // Flip selectors from sign bit
+    unsigned int t2 = (t0 | t1) ^ 0xFFFF0000;
+
+    return d | static_cast<uint64_t>(_bswap(t2)) << 32;
+}
+
+#endif
+
+static etcpak_force_inline void Average( const uint8_t* data, v4i* a )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
+    __m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
+    __m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
+    __m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
+    __m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
+    __m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
+    __m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
+    __m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
+
+    __m128i sum0 = _mm_add_epi16(d0l, d1l);
+    __m128i sum1 = _mm_add_epi16(d0h, d1h);
+    __m128i sum2 = _mm_add_epi16(d2l, d3l);
+    __m128i sum3 = _mm_add_epi16(d2h, d3h);
+
+    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
+    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
+    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
+    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
+    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
+    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
+    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
+    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
+
+    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
+    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
+    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
+    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
+
+    __m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
+    __m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
+    __m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
+    __m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
+
+    _mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
+    _mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
+#elif defined __ARM_NEON
+    uint8x16x2_t t0 = vzipq_u8(vld1q_u8(data +  0), uint8x16_t());
+    uint8x16x2_t t1 = vzipq_u8(vld1q_u8(data + 16), uint8x16_t());
+    uint8x16x2_t t2 = vzipq_u8(vld1q_u8(data + 32), uint8x16_t());
+    uint8x16x2_t t3 = vzipq_u8(vld1q_u8(data + 48), uint8x16_t());
+
+    uint16x8x2_t d0 = { vreinterpretq_u16_u8(t0.val[0]), vreinterpretq_u16_u8(t0.val[1]) };
+    uint16x8x2_t d1 = { vreinterpretq_u16_u8(t1.val[0]), vreinterpretq_u16_u8(t1.val[1]) };
+    uint16x8x2_t d2 = { vreinterpretq_u16_u8(t2.val[0]), vreinterpretq_u16_u8(t2.val[1]) };
+    uint16x8x2_t d3 = { vreinterpretq_u16_u8(t3.val[0]), vreinterpretq_u16_u8(t3.val[1]) };
+
+    uint16x8x2_t s0 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[0] ), vreinterpretq_s16_u16( d1.val[0] ) ) ), uint16x8_t());
+    uint16x8x2_t s1 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[1] ), vreinterpretq_s16_u16( d1.val[1] ) ) ), uint16x8_t());
+    uint16x8x2_t s2 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[0] ), vreinterpretq_s16_u16( d3.val[0] ) ) ), uint16x8_t());
+    uint16x8x2_t s3 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[1] ), vreinterpretq_s16_u16( d3.val[1] ) ) ), uint16x8_t());
+
+    uint32x4x2_t sum0 = { vreinterpretq_u32_u16(s0.val[0]), vreinterpretq_u32_u16(s0.val[1]) };
+    uint32x4x2_t sum1 = { vreinterpretq_u32_u16(s1.val[0]), vreinterpretq_u32_u16(s1.val[1]) };
+    uint32x4x2_t sum2 = { vreinterpretq_u32_u16(s2.val[0]), vreinterpretq_u32_u16(s2.val[1]) };
+    uint32x4x2_t sum3 = { vreinterpretq_u32_u16(s3.val[0]), vreinterpretq_u32_u16(s3.val[1]) };
+
+    uint32x4_t b0 = vaddq_u32(sum0.val[0], sum0.val[1]);
+    uint32x4_t b1 = vaddq_u32(sum1.val[0], sum1.val[1]);
+    uint32x4_t b2 = vaddq_u32(sum2.val[0], sum2.val[1]);
+    uint32x4_t b3 = vaddq_u32(sum3.val[0], sum3.val[1]);
+
+    uint32x4_t a0 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b2, b3), vdupq_n_u32(4)), 3);
+    uint32x4_t a1 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b0, b1), vdupq_n_u32(4)), 3);
+    uint32x4_t a2 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b1, b3), vdupq_n_u32(4)), 3);
+    uint32x4_t a3 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b0, b2), vdupq_n_u32(4)), 3);
+
+    uint16x8_t o0 = vcombine_u16(vqmovun_s32(vreinterpretq_s32_u32( a0 )), vqmovun_s32(vreinterpretq_s32_u32( a1 )));
+    uint16x8_t o1 = vcombine_u16(vqmovun_s32(vreinterpretq_s32_u32( a2 )), vqmovun_s32(vreinterpretq_s32_u32( a3 )));
+
+    a[0] = v4i{o0[2], o0[1], o0[0], 0};
+    a[1] = v4i{o0[6], o0[5], o0[4], 0};
+    a[2] = v4i{o1[2], o1[1], o1[0], 0};
+    a[3] = v4i{o1[6], o1[5], o1[4], 0};
+#else
+    uint32_t r[4];
+    uint32_t g[4];
+    uint32_t b[4];
+
+    memset(r, 0, sizeof(r));
+    memset(g, 0, sizeof(g));
+    memset(b, 0, sizeof(b));
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            int index = (j & 2) + (i >> 1);
+            b[index] += *data++;
+            g[index] += *data++;
+            r[index] += *data++;
+            data++;
+        }
+    }
+
+    a[0] = v4i{ uint16_t( (r[2] + r[3] + 4) / 8 ), uint16_t( (g[2] + g[3] + 4) / 8 ), uint16_t( (b[2] + b[3] + 4) / 8 ), 0};
+    a[1] = v4i{ uint16_t( (r[0] + r[1] + 4) / 8 ), uint16_t( (g[0] + g[1] + 4) / 8 ), uint16_t( (b[0] + b[1] + 4) / 8 ), 0};
+    a[2] = v4i{ uint16_t( (r[1] + r[3] + 4) / 8 ), uint16_t( (g[1] + g[3] + 4) / 8 ), uint16_t( (b[1] + b[3] + 4) / 8 ), 0};
+    a[3] = v4i{ uint16_t( (r[0] + r[2] + 4) / 8 ), uint16_t( (g[0] + g[2] + 4) / 8 ), uint16_t( (b[0] + b[2] + 4) / 8 ), 0};
+#endif
+}
+
+static etcpak_force_inline void CalcErrorBlock( const uint8_t* data, unsigned int err[4][4] )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
+
+    __m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
+    __m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
+    __m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
+    __m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
+    __m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
+    __m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
+    __m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
+    __m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
+
+    __m128i sum0 = _mm_add_epi16(d0l, d1l);
+    __m128i sum1 = _mm_add_epi16(d0h, d1h);
+    __m128i sum2 = _mm_add_epi16(d2l, d3l);
+    __m128i sum3 = _mm_add_epi16(d2h, d3h);
+
+    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
+    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
+    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
+    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
+    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
+    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
+    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
+    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
+
+    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
+    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
+    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
+    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
+
+    __m128i a0 = _mm_add_epi32(b2, b3);
+    __m128i a1 = _mm_add_epi32(b0, b1);
+    __m128i a2 = _mm_add_epi32(b1, b3);
+    __m128i a3 = _mm_add_epi32(b0, b2);
+
+    _mm_storeu_si128((__m128i*)&err[0], a0);
+    _mm_storeu_si128((__m128i*)&err[1], a1);
+    _mm_storeu_si128((__m128i*)&err[2], a2);
+    _mm_storeu_si128((__m128i*)&err[3], a3);
+#elif defined __ARM_NEON
+    uint8x16x2_t t0 = vzipq_u8(vld1q_u8(data +  0), uint8x16_t());
+    uint8x16x2_t t1 = vzipq_u8(vld1q_u8(data + 16), uint8x16_t());
+    uint8x16x2_t t2 = vzipq_u8(vld1q_u8(data + 32), uint8x16_t());
+    uint8x16x2_t t3 = vzipq_u8(vld1q_u8(data + 48), uint8x16_t());
+
+    uint16x8x2_t d0 = { vreinterpretq_u16_u8(t0.val[0]), vreinterpretq_u16_u8(t0.val[1]) };
+    uint16x8x2_t d1 = { vreinterpretq_u16_u8(t1.val[0]), vreinterpretq_u16_u8(t1.val[1]) };
+    uint16x8x2_t d2 = { vreinterpretq_u16_u8(t2.val[0]), vreinterpretq_u16_u8(t2.val[1]) };
+    uint16x8x2_t d3 = { vreinterpretq_u16_u8(t3.val[0]), vreinterpretq_u16_u8(t3.val[1]) };
+
+    uint16x8x2_t s0 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[0] ), vreinterpretq_s16_u16( d1.val[0] ))), uint16x8_t());
+    uint16x8x2_t s1 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[1] ), vreinterpretq_s16_u16( d1.val[1] ))), uint16x8_t());
+    uint16x8x2_t s2 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[0] ), vreinterpretq_s16_u16( d3.val[0] ))), uint16x8_t());
+    uint16x8x2_t s3 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[1] ), vreinterpretq_s16_u16( d3.val[1] ))), uint16x8_t());
+
+    uint32x4x2_t sum0 = { vreinterpretq_u32_u16(s0.val[0]), vreinterpretq_u32_u16(s0.val[1]) };
+    uint32x4x2_t sum1 = { vreinterpretq_u32_u16(s1.val[0]), vreinterpretq_u32_u16(s1.val[1]) };
+    uint32x4x2_t sum2 = { vreinterpretq_u32_u16(s2.val[0]), vreinterpretq_u32_u16(s2.val[1]) };
+    uint32x4x2_t sum3 = { vreinterpretq_u32_u16(s3.val[0]), vreinterpretq_u32_u16(s3.val[1]) };
+
+    uint32x4_t b0 = vaddq_u32(sum0.val[0], sum0.val[1]);
+    uint32x4_t b1 = vaddq_u32(sum1.val[0], sum1.val[1]);
+    uint32x4_t b2 = vaddq_u32(sum2.val[0], sum2.val[1]);
+    uint32x4_t b3 = vaddq_u32(sum3.val[0], sum3.val[1]);
+
+    uint32x4_t a0 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b2, b3) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a1 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b0, b1) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a2 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b1, b3) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a3 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b0, b2) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+
+    vst1q_u32(err[0], a0);
+    vst1q_u32(err[1], a1);
+    vst1q_u32(err[2], a2);
+    vst1q_u32(err[3], a3);
+#else
+    unsigned int terr[4][4];
+
+    memset(terr, 0, 16 * sizeof(unsigned int));
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            int index = (j & 2) + (i >> 1);
+            unsigned int d = *data++;
+            terr[index][0] += d;
+            d = *data++;
+            terr[index][1] += d;
+            d = *data++;
+            terr[index][2] += d;
+            data++;
+        }
+    }
+
+    for( int i=0; i<3; i++ )
+    {
+        err[0][i] = terr[2][i] + terr[3][i];
+        err[1][i] = terr[0][i] + terr[1][i];
+        err[2][i] = terr[1][i] + terr[3][i];
+        err[3][i] = terr[0][i] + terr[2][i];
+    }
+    for( int i=0; i<4; i++ )
+    {
+        err[i][3] = 0;
+    }
+#endif
+}
+
+static etcpak_force_inline unsigned int CalcError( const unsigned int block[4], const v4i& average )
+{
+    unsigned int err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
+    err -= block[0] * 2 * average[2];
+    err -= block[1] * 2 * average[1];
+    err -= block[2] * 2 * average[0];
+    err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
+    return err;
+}
+
+static etcpak_force_inline void ProcessAverages( v4i* a )
+{
+#ifdef __SSE4_1__
+    for( int i=0; i<2; i++ )
+    {
+        __m128i d = _mm_loadu_si128((__m128i*)a[i*2].data());
+
+        __m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
+
+        __m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
+
+        __m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
+        __m128i diff = _mm_sub_epi16(c, c1);
+        diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
+        diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
+
+        __m128i co = _mm_add_epi16(c1, diff);
+
+        c = _mm_blend_epi16(co, c, 0xF0);
+
+        __m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
+
+        _mm_storeu_si128((__m128i*)a[4+i*2].data(), a0);
+    }
+
+    for( int i=0; i<2; i++ )
+    {
+        __m128i d = _mm_loadu_si128((__m128i*)a[i*2].data());
+
+        __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
+        __m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
+
+        __m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
+
+        _mm_storeu_si128((__m128i*)a[i*2].data(), t2);
+    }
+#elif defined __ARM_NEON
+    for( int i=0; i<2; i++ )
+    {
+        int16x8_t d = vld1q_s16((int16_t*)&a[i*2]);
+        int16x8_t t = vaddq_s16(vmulq_s16(d, vdupq_n_s16(31)), vdupq_n_s16(128));
+        int16x8_t c = vshrq_n_s16(vaddq_s16(t, vshrq_n_s16(t, 8)), 8);
+
+        int16x8_t c1 = vcombine_s16(vget_high_s16(c), vget_high_s16(c));
+        int16x8_t diff = vsubq_s16(c, c1);
+        diff = vmaxq_s16(diff, vdupq_n_s16(-4));
+        diff = vminq_s16(diff, vdupq_n_s16(3));
+
+        int16x8_t co = vaddq_s16(c1, diff);
+
+        c = vcombine_s16(vget_low_s16(co), vget_high_s16(c));
+
+        int16x8_t a0 = vorrq_s16(vshlq_n_s16(c, 3), vshrq_n_s16(c, 2));
+
+        vst1q_s16((int16_t*)&a[4+i*2], a0);
+    }
+
+    for( int i=0; i<2; i++ )
+    {
+        int16x8_t d = vld1q_s16((int16_t*)&a[i*2]);
+
+        int16x8_t t0 = vaddq_s16(vmulq_s16(d, vdupq_n_s16(15)), vdupq_n_s16(128));
+        int16x8_t t1 = vshrq_n_s16(vaddq_s16(t0, vshrq_n_s16(t0, 8)), 8);
+
+        int16x8_t t2 = vorrq_s16(t1, vshlq_n_s16(t1, 4));
+
+        vst1q_s16((int16_t*)&a[i*2], t2);
+    }
+#else
+    for( int i=0; i<2; i++ )
+    {
+        for( int j=0; j<3; j++ )
+        {
+            int32_t c1 = mul8bit( a[i*2+1][j], 31 );
+            int32_t c2 = mul8bit( a[i*2][j], 31 );
+
+            int32_t diff = c2 - c1;
+            if( diff > 3 ) diff = 3;
+            else if( diff < -4 ) diff = -4;
+
+            int32_t co = c1 + diff;
+
+            a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
+            a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
+        }
+    }
+
+    for( int i=0; i<4; i++ )
+    {
+        a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
+        a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
+        a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
+    }
+#endif
+}
+
+static etcpak_force_inline void EncodeAverages( uint64_t& _d, const v4i* a, size_t idx )
+{
+    auto d = _d;
+    d |= ( idx << 24 );
+    size_t base = idx << 1;
+
+    if( ( idx & 0x2 ) == 0 )
+    {
+        for( int i=0; i<3; i++ )
+        {
+            d |= uint64_t( a[base+0][i] >> 4 ) << ( i*8 );
+            d |= uint64_t( a[base+1][i] >> 4 ) << ( i*8 + 4 );
+        }
+    }
+    else
+    {
+        for( int i=0; i<3; i++ )
+        {
+            d |= uint64_t( a[base+1][i] & 0xF8 ) << ( i*8 );
+            int32_t c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
+            c &= ~0xFFFFFFF8;
+            d |= ((uint64_t)c) << ( i*8 );
+        }
+    }
+    _d = d;
+}
+
+static etcpak_force_inline uint64_t CheckSolid( const uint8_t* src )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i c0 = _mm_cmpeq_epi8(d0, c);
+    __m128i c1 = _mm_cmpeq_epi8(d1, c);
+    __m128i c2 = _mm_cmpeq_epi8(d2, c);
+    __m128i c3 = _mm_cmpeq_epi8(d3, c);
+
+    __m128i m0 = _mm_and_si128(c0, c1);
+    __m128i m1 = _mm_and_si128(c2, c3);
+    __m128i m = _mm_and_si128(m0, m1);
+
+    if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
+    {
+        return 0;
+    }
+#elif defined __ARM_NEON
+    int32x4_t d0 = vld1q_s32((int32_t*)src +  0);
+    int32x4_t d1 = vld1q_s32((int32_t*)src +  4);
+    int32x4_t d2 = vld1q_s32((int32_t*)src +  8);
+    int32x4_t d3 = vld1q_s32((int32_t*)src + 12);
+
+    int32x4_t c = vdupq_n_s32(d0[0]);
+
+    int32x4_t c0 = vreinterpretq_s32_u32(vceqq_s32(d0, c));
+    int32x4_t c1 = vreinterpretq_s32_u32(vceqq_s32(d1, c));
+    int32x4_t c2 = vreinterpretq_s32_u32(vceqq_s32(d2, c));
+    int32x4_t c3 = vreinterpretq_s32_u32(vceqq_s32(d3, c));
+
+    int32x4_t m0 = vandq_s32(c0, c1);
+    int32x4_t m1 = vandq_s32(c2, c3);
+    int64x2_t m = vreinterpretq_s64_s32(vandq_s32(m0, m1));
+
+    if (m[0] != -1 || m[1] != -1)
+    {
+        return 0;
+    }
+#else
+    const uint8_t* ptr = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        if( memcmp( src, ptr, 4 ) != 0 )
+        {
+            return 0;
+        }
+        ptr += 4;
+    }
+#endif
+    return 0x02000000 |
+        ( (unsigned int)( src[0] & 0xF8 ) << 16 ) |
+        ( (unsigned int)( src[1] & 0xF8 ) << 8 ) |
+        ( (unsigned int)( src[2] & 0xF8 ) );
+}
+
+static etcpak_force_inline void PrepareAverages( v4i a[8], const uint8_t* src, unsigned int err[4] )
+{
+    Average( src, a );
+    ProcessAverages( a );
+
+    unsigned int errblock[4][4];
+    CalcErrorBlock( src, errblock );
+
+    for( int i=0; i<4; i++ )
+    {
+        err[i/2] += CalcError( errblock[i], a[i] );
+        err[2+i/2] += CalcError( errblock[i], a[i+4] );
+    }
+}
+
+static etcpak_force_inline void FindBestFit( uint64_t terr[2][8], uint16_t tsel[16][8], v4i a[8], const uint32_t* id, const uint8_t* data )
+{
+    for( size_t i=0; i<16; i++ )
+    {
+        uint16_t* sel = tsel[i];
+        unsigned int bid = id[i];
+        uint64_t* ter = terr[bid%2];
+
+        uint8_t b = *data++;
+        uint8_t g = *data++;
+        uint8_t r = *data++;
+        data++;
+
+        int dr = a[bid][0] - r;
+        int dg = a[bid][1] - g;
+        int db = a[bid][2] - b;
+
+#ifdef __SSE4_1__
+        // Reference implementation
+
+        __m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        __m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
+        __m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
+        __m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
+        __m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
+
+        __m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
+        __m128i minError0 = _mm_min_epi32(error0, error1);
+
+        __m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
+        __m128i minError1 = _mm_min_epi32(error2, error3);
+
+        __m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
+        __m128i minError = _mm_min_epi32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        __m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
+        __m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
+        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
+        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
+        __m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
+        __m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
+        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
+        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
+        error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
+        error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
+        error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
+
+        index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
+        minError0 = _mm_min_epi32(error0, error1);
+
+        index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
+        minError1 = _mm_min_epi32(error2, error3);
+
+        __m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
+        minError = _mm_min_epi32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
+        squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
+        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
+        _mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
+        minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
+        squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
+        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
+        _mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
+        __m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
+        _mm_storeu_si128((__m128i*)sel, minIndex);
+#elif defined __ARM_NEON
+        int32x4_t pix = vdupq_n_s32(dr * 77 + dg * 151 + db * 28);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        uint32x4_t error0 = vreinterpretq_u32_s32(vabsq_s32(vaddq_s32(pix, g_table256_NEON[0])));
+        uint32x4_t error1 = vreinterpretq_u32_s32(vabsq_s32(vaddq_s32(pix, g_table256_NEON[1])));
+        uint32x4_t error2 = vreinterpretq_u32_s32(vabsq_s32(vsubq_s32(pix, g_table256_NEON[0])));
+        uint32x4_t error3 = vreinterpretq_u32_s32(vabsq_s32(vsubq_s32(pix, g_table256_NEON[1])));
+
+        uint32x4_t index0 = vandq_u32(vcltq_u32(error1, error0), vdupq_n_u32(1));
+        uint32x4_t minError0 = vminq_u32(error0, error1);
+
+        uint32x4_t index1 = vreinterpretq_u32_s32(vsubq_s32(vdupq_n_s32(2), vreinterpretq_s32_u32(vcltq_u32(error3, error2))));
+        uint32x4_t minError1 = vminq_u32(error2, error3);
+
+        uint32x4_t blendMask = vcltq_u32(minError1, minError0);
+        uint32x4_t minIndex0 = vorrq_u32(vbicq_u32(index0, blendMask), vandq_u32(index1, blendMask));
+        uint32x4_t minError = vminq_u32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        uint32x4_t squareErrorLow = vmulq_u32(minError, minError);
+        uint32x4_t squareErrorHigh = vshrq_n_u32(vreinterpretq_u32_s32(vqdmulhq_s32(vreinterpretq_s32_u32(minError), vreinterpretq_s32_u32(minError))), 1);
+        uint32x4x2_t squareErrorZip = vzipq_u32(squareErrorLow, squareErrorHigh);
+        uint64x2x2_t squareError = { vreinterpretq_u64_u32(squareErrorZip.val[0]), vreinterpretq_u64_u32(squareErrorZip.val[1]) };
+        squareError.val[0] = vaddq_u64(squareError.val[0], vld1q_u64(ter + 0));
+        squareError.val[1] = vaddq_u64(squareError.val[1], vld1q_u64(ter + 2));
+        vst1q_u64(ter + 0, squareError.val[0]);
+        vst1q_u64(ter + 2, squareError.val[1]);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        error0 = vreinterpretq_u32_s32( vabsq_s32(vaddq_s32(pix, g_table256_NEON[2])));
+        error1 = vreinterpretq_u32_s32( vabsq_s32(vaddq_s32(pix, g_table256_NEON[3])));
+        error2 = vreinterpretq_u32_s32( vabsq_s32(vsubq_s32(pix, g_table256_NEON[2])));
+        error3 = vreinterpretq_u32_s32( vabsq_s32(vsubq_s32(pix, g_table256_NEON[3])));
+
+        index0 = vandq_u32(vcltq_u32(error1, error0), vdupq_n_u32(1));
+        minError0 = vminq_u32(error0, error1);
+
+        index1 = vreinterpretq_u32_s32( vsubq_s32(vdupq_n_s32(2), vreinterpretq_s32_u32(vcltq_u32(error3, error2))) );
+        minError1 = vminq_u32(error2, error3);
+
+        blendMask = vcltq_u32(minError1, minError0);
+        uint32x4_t minIndex1 = vorrq_u32(vbicq_u32(index0, blendMask), vandq_u32(index1, blendMask));
+        minError = vminq_u32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        squareErrorLow = vmulq_u32(minError, minError);
+        squareErrorHigh = vshrq_n_u32(vreinterpretq_u32_s32( vqdmulhq_s32(vreinterpretq_s32_u32(minError), vreinterpretq_s32_u32(minError)) ), 1 );
+        squareErrorZip = vzipq_u32(squareErrorLow, squareErrorHigh);
+        squareError.val[0] = vaddq_u64(vreinterpretq_u64_u32( squareErrorZip.val[0] ), vld1q_u64(ter + 4));
+        squareError.val[1] = vaddq_u64(vreinterpretq_u64_u32( squareErrorZip.val[1] ), vld1q_u64(ter + 6));
+        vst1q_u64(ter + 4, squareError.val[0]);
+        vst1q_u64(ter + 6, squareError.val[1]);
+
+        uint16x8_t minIndex = vcombine_u16(vqmovn_u32(minIndex0), vqmovn_u32(minIndex1));
+        vst1q_u16(sel, minIndex);
+#else
+        int pix = dr * 77 + dg * 151 + db * 28;
+
+        for( int t=0; t<8; t++ )
+        {
+            const int64_t* tab = g_table256[t];
+            unsigned int idx = 0;
+            uint64_t err = sq( tab[0] + pix );
+            for( int j=1; j<4; j++ )
+            {
+                uint64_t local = sq( tab[j] + pix );
+                if( local < err )
+                {
+                    err = local;
+                    idx = j;
+                }
+            }
+            *sel++ = idx;
+            *ter++ += err;
+        }
+#endif
+    }
+}
+
+#if defined __SSE4_1__ || defined __ARM_NEON
+// Non-reference implementation, but faster. Produces same results as the AVX2 version
+static etcpak_force_inline void FindBestFit( uint32_t terr[2][8], uint16_t tsel[16][8], v4i a[8], const uint32_t* id, const uint8_t* data )
+{
+    for( size_t i=0; i<16; i++ )
+    {
+        uint16_t* sel = tsel[i];
+        unsigned int bid = id[i];
+        uint32_t* ter = terr[bid%2];
+
+        uint8_t b = *data++;
+        uint8_t g = *data++;
+        uint8_t r = *data++;
+        data++;
+
+        int dr = a[bid][0] - r;
+        int dg = a[bid][1] - g;
+        int db = a[bid][2] - b;
+
+#ifdef __SSE4_1__
+        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+        // This produces slightly different results, but is significant faster
+        __m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
+        __m128i pix = _mm_abs_epi16(pixel);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+        __m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
+        __m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
+
+        __m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
+        __m128i minError = _mm_min_epi16(error0, error1);
+
+        // Exploiting symmetry of the selector table and use the sign bit
+        // This produces slightly different results, but is needed to produce same results as AVX2 implementation
+        __m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
+        __m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
+
+        // Squaring the minimum error to produce correct values when adding
+        __m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
+        __m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
+
+        __m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
+        __m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
+
+        squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
+        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
+        squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
+        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
+
+        _mm_storeu_si128((__m128i*)sel, minIndex);
+#elif defined __ARM_NEON
+        int16x8_t pixel = vdupq_n_s16( dr * 38 + dg * 76 + db * 14 );
+        int16x8_t pix = vabsq_s16( pixel );
+
+        int16x8_t error0 = vabsq_s16( vsubq_s16( pix, g_table128_NEON[0] ) );
+        int16x8_t error1 = vabsq_s16( vsubq_s16( pix, g_table128_NEON[1] ) );
+
+        int16x8_t index = vandq_s16( vreinterpretq_s16_u16( vcltq_s16( error1, error0 ) ), vdupq_n_s16( 1 ) );
+        int16x8_t minError = vminq_s16( error0, error1 );
+
+        int16x8_t indexBit = vandq_s16( vmvnq_s16( vshrq_n_s16( pixel, 15 ) ), vdupq_n_s16( -1 ) );
+        int16x8_t minIndex = vorrq_s16( index, vaddq_s16( indexBit, indexBit ) );
+
+        int16x4_t minErrorLow = vget_low_s16( minError );
+        int16x4_t minErrorHigh = vget_high_s16( minError );
+
+        int32x4_t squareErrorLow = vmull_s16( minErrorLow, minErrorLow );
+        int32x4_t squareErrorHigh = vmull_s16( minErrorHigh, minErrorHigh );
+
+        int32x4_t squareErrorSumLow = vaddq_s32( squareErrorLow, vld1q_s32( (int32_t*)ter ) );
+        int32x4_t squareErrorSumHigh = vaddq_s32( squareErrorHigh, vld1q_s32( (int32_t*)ter + 4 ) );
+
+        vst1q_s32( (int32_t*)ter, squareErrorSumLow );
+        vst1q_s32( (int32_t*)ter + 4, squareErrorSumHigh );
+
+        vst1q_s16( (int16_t*)sel, minIndex );
+#endif
+    }
+}
+#endif
+
+static etcpak_force_inline uint8_t convert6(float f)
+{
+    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
+    return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
+}
+
+static etcpak_force_inline uint8_t convert7(float f)
+{
+    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
+    return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
+}
+
+static etcpak_force_inline std::pair<uint64_t, uint64_t> Planar(const uint8_t* src)
+{
+    int32_t r = 0;
+    int32_t g = 0;
+    int32_t b = 0;
+
+    for (int i = 0; i < 16; ++i)
+    {
+        b += src[i * 4 + 0];
+        g += src[i * 4 + 1];
+        r += src[i * 4 + 2];
+    }
+
+    int32_t difRyz = 0;
+    int32_t difGyz = 0;
+    int32_t difByz = 0;
+    int32_t difRxz = 0;
+    int32_t difGxz = 0;
+    int32_t difBxz = 0;
+
+    const int32_t scaling[] = { -255, -85, 85, 255 };
+
+    for (int i = 0; i < 16; ++i)
+    {
+        int32_t difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
+        int32_t difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
+        int32_t difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
+
+        difRyz += difR * scaling[i % 4];
+        difGyz += difG * scaling[i % 4];
+        difByz += difB * scaling[i % 4];
+
+        difRxz += difR * scaling[i / 4];
+        difGxz += difG * scaling[i / 4];
+        difBxz += difB * scaling[i / 4];
+    }
+
+    const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
+
+    float aR = difRxz * scale;
+    float aG = difGxz * scale;
+    float aB = difBxz * scale;
+
+    float bR = difRyz * scale;
+    float bG = difGyz * scale;
+    float bB = difByz * scale;
+
+    float dR = r * (4.0f / 16.0f);
+    float dG = g * (4.0f / 16.0f);
+    float dB = b * (4.0f / 16.0f);
+
+    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
+    float cofR = std::fma(aR,  255.0f, std::fma(bR,  255.0f, dR));
+    float cofG = std::fma(aG,  255.0f, std::fma(bG,  255.0f, dG));
+    float cofB = std::fma(aB,  255.0f, std::fma(bB,  255.0f, dB));
+    float chfR = std::fma(aR, -425.0f, std::fma(bR,  255.0f, dR));
+    float chfG = std::fma(aG, -425.0f, std::fma(bG,  255.0f, dG));
+    float chfB = std::fma(aB, -425.0f, std::fma(bB,  255.0f, dB));
+    float cvfR = std::fma(aR,  255.0f, std::fma(bR, -425.0f, dR));
+    float cvfG = std::fma(aG,  255.0f, std::fma(bG, -425.0f, dG));
+    float cvfB = std::fma(aB,  255.0f, std::fma(bB, -425.0f, dB));
+
+    // convert to r6g7b6
+    int32_t coR = convert6(cofR);
+    int32_t coG = convert7(cofG);
+    int32_t coB = convert6(cofB);
+    int32_t chR = convert6(chfR);
+    int32_t chG = convert7(chfG);
+    int32_t chB = convert6(chfB);
+    int32_t cvR = convert6(cvfR);
+    int32_t cvG = convert7(cvfG);
+    int32_t cvB = convert6(cvfB);
+
+    // Error calculation
+    auto ro0 = coR;
+    auto go0 = coG;
+    auto bo0 = coB;
+    auto ro1 = (ro0 >> 4) | (ro0 << 2);
+    auto go1 = (go0 >> 6) | (go0 << 1);
+    auto bo1 = (bo0 >> 4) | (bo0 << 2);
+    auto ro2 = (ro1 << 2) + 2;
+    auto go2 = (go1 << 2) + 2;
+    auto bo2 = (bo1 << 2) + 2;
+
+    auto rh0 = chR;
+    auto gh0 = chG;
+    auto bh0 = chB;
+    auto rh1 = (rh0 >> 4) | (rh0 << 2);
+    auto gh1 = (gh0 >> 6) | (gh0 << 1);
+    auto bh1 = (bh0 >> 4) | (bh0 << 2);
+
+    auto rh2 = rh1 - ro1;
+    auto gh2 = gh1 - go1;
+    auto bh2 = bh1 - bo1;
+
+    auto rv0 = cvR;
+    auto gv0 = cvG;
+    auto bv0 = cvB;
+    auto rv1 = (rv0 >> 4) | (rv0 << 2);
+    auto gv1 = (gv0 >> 6) | (gv0 << 1);
+    auto bv1 = (bv0 >> 4) | (bv0 << 2);
+
+    auto rv2 = rv1 - ro1;
+    auto gv2 = gv1 - go1;
+    auto bv2 = bv1 - bo1;
+
+    uint64_t error = 0;
+
+    for (int i = 0; i < 16; ++i)
+    {
+        int32_t cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
+        int32_t cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
+        int32_t cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
+
+        int32_t difB = static_cast<int>(src[i * 4 + 0]) - cB;
+        int32_t difG = static_cast<int>(src[i * 4 + 1]) - cG;
+        int32_t difR = static_cast<int>(src[i * 4 + 2]) - cR;
+
+        int32_t dif = difR * 38 + difG * 76 + difB * 14;
+
+        error += dif * dif;
+    }
+
+    /**/
+    uint32_t rgbv = cvB | (cvG << 6) | (cvR << 13);
+    uint32_t rgbh = chB | (chG << 6) | (chR << 13);
+    uint32_t hi = rgbv | ((rgbh & 0x1FFF) << 19);
+    uint32_t lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
+    lo |= ((coB & 0x07) <<  7) | ((coB & 0x18) <<  8) | ((coB & 0x20) << 11);
+    lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
+    lo |= coR << 25;
+
+    const auto idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
+
+    lo |= g_flags[idx];
+
+    uint64_t result = static_cast<uint32_t>(_bswap(lo));
+    result |= static_cast<uint64_t>(static_cast<uint32_t>(_bswap(hi))) << 32;
+
+    return std::make_pair(result, error);
+}
+
+#ifdef __ARM_NEON
+
+static etcpak_force_inline int32x2_t Planar_NEON_DifXZ( int16x8_t dif_lo, int16x8_t dif_hi )
+{
+    int32x4_t dif0 = vmull_n_s16( vget_low_s16( dif_lo ), -255 );
+    int32x4_t dif1 = vmull_n_s16( vget_high_s16( dif_lo ), -85 );
+    int32x4_t dif2 = vmull_n_s16( vget_low_s16( dif_hi ), 85 );
+    int32x4_t dif3 = vmull_n_s16( vget_high_s16( dif_hi ), 255 );
+    int32x4_t dif4 = vaddq_s32( vaddq_s32( dif0, dif1 ), vaddq_s32( dif2, dif3 ) );
+
+#ifndef __aarch64__
+    int32x2_t dif5 = vpadd_s32( vget_low_s32( dif4 ), vget_high_s32( dif4 ) );
+    return vpadd_s32( dif5, dif5 );
+#else
+    return vdup_n_s32( vaddvq_s32( dif4 ) );
+#endif
+}
+
+static etcpak_force_inline int32x2_t Planar_NEON_DifYZ( int16x8_t dif_lo, int16x8_t dif_hi )
+{
+    int16x4_t scaling = { -255, -85, 85, 255 };
+    int32x4_t dif0 = vmull_s16( vget_low_s16( dif_lo ), scaling );
+    int32x4_t dif1 = vmull_s16( vget_high_s16( dif_lo ), scaling );
+    int32x4_t dif2 = vmull_s16( vget_low_s16( dif_hi ), scaling );
+    int32x4_t dif3 = vmull_s16( vget_high_s16( dif_hi ), scaling );
+    int32x4_t dif4 = vaddq_s32( vaddq_s32( dif0, dif1 ), vaddq_s32( dif2, dif3 ) );
+
+#ifndef __aarch64__
+    int32x2_t dif5 = vpadd_s32( vget_low_s32( dif4 ), vget_high_s32( dif4 ) );
+    return vpadd_s32( dif5, dif5 );
+#else
+    return vdup_n_s32( vaddvq_s32( dif4 ) );
+#endif
+}
+
+static etcpak_force_inline int16x8_t Planar_NEON_SumWide( uint8x16_t src )
+{
+    uint16x8_t accu8 = vpaddlq_u8( src );
+#ifndef __aarch64__
+    uint16x4_t accu4 = vpadd_u16( vget_low_u16( accu8 ), vget_high_u16( accu8 ) );
+    uint16x4_t accu2 = vpadd_u16( accu4, accu4 );
+    uint16x4_t accu1 = vpadd_u16( accu2, accu2 );
+    return vreinterpretq_s16_u16( vcombine_u16( accu1, accu1 ) );
+#else 
+    return vdupq_n_s16( vaddvq_u16( accu8 ) );
+#endif
+}
+
+static etcpak_force_inline int16x8_t convert6_NEON( int32x4_t lo, int32x4_t hi )
+{
+    uint16x8_t x = vcombine_u16( vqmovun_s32( lo ), vqmovun_s32( hi ) );
+    int16x8_t i = vreinterpretq_s16_u16( vshrq_n_u16( vqshlq_n_u16( x, 6 ), 6) ); // clamp 0-1023
+    i = vhsubq_s16( i, vdupq_n_s16( 15 ) );
+
+    int16x8_t ip11 = vaddq_s16( i, vdupq_n_s16( 11 ) );
+    int16x8_t ip4 = vaddq_s16( i, vdupq_n_s16( 4 ) );
+
+    return vshrq_n_s16( vsubq_s16( vsubq_s16( ip11, vshrq_n_s16( ip11, 7 ) ), vshrq_n_s16( ip4, 7) ), 3 );
+}
+
+static etcpak_force_inline int16x4_t convert7_NEON( int32x4_t x )
+{
+    int16x4_t i = vreinterpret_s16_u16( vshr_n_u16( vqshl_n_u16( vqmovun_s32( x ), 6 ), 6 ) ); // clamp 0-1023
+    i = vhsub_s16( i, vdup_n_s16( 15 ) );
+
+    int16x4_t p9 = vadd_s16( i, vdup_n_s16( 9 ) );
+    int16x4_t p6 = vadd_s16( i, vdup_n_s16( 6 ) );
+    return vshr_n_s16( vsub_s16( vsub_s16( p9, vshr_n_s16( p9, 8 ) ), vshr_n_s16( p6, 8 ) ), 2 );
+}
+
+static etcpak_force_inline std::pair<uint64_t, uint64_t> Planar_NEON( const uint8_t* src )
+{
+    uint8x16x4_t srcBlock = vld4q_u8( src );
+
+    int16x8_t bSumWide = Planar_NEON_SumWide( srcBlock.val[0] );
+    int16x8_t gSumWide = Planar_NEON_SumWide( srcBlock.val[1] );
+    int16x8_t rSumWide = Planar_NEON_SumWide( srcBlock.val[2] );
+
+    int16x8_t dif_R_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[2] ), 4) ), rSumWide );
+    int16x8_t dif_R_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[2] ), 4) ), rSumWide );
+
+    int16x8_t dif_G_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[1] ), 4 ) ), gSumWide );
+    int16x8_t dif_G_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[1] ), 4 ) ), gSumWide );
+
+    int16x8_t dif_B_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[0] ), 4) ), bSumWide );
+    int16x8_t dif_B_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[0] ), 4) ), bSumWide );
+
+    int32x2x2_t dif_xz_z = vzip_s32( vzip_s32( Planar_NEON_DifXZ( dif_B_lo, dif_B_hi ), Planar_NEON_DifXZ( dif_R_lo, dif_R_hi ) ).val[0], Planar_NEON_DifXZ( dif_G_lo, dif_G_hi ) );
+    int32x4_t dif_xz = vcombine_s32( dif_xz_z.val[0], dif_xz_z.val[1] );
+    int32x2x2_t dif_yz_z = vzip_s32( vzip_s32( Planar_NEON_DifYZ( dif_B_lo, dif_B_hi ), Planar_NEON_DifYZ( dif_R_lo, dif_R_hi ) ).val[0], Planar_NEON_DifYZ( dif_G_lo, dif_G_hi ) );
+    int32x4_t dif_yz = vcombine_s32( dif_yz_z.val[0], dif_yz_z.val[1] );
+
+    const float fscale = -4.0f / ( (255 * 255 * 8.0f + 85 * 85 * 8.0f ) * 16.0f );
+    float32x4_t fa = vmulq_n_f32( vcvtq_f32_s32( dif_xz ), fscale );
+    float32x4_t fb = vmulq_n_f32( vcvtq_f32_s32( dif_yz ), fscale );
+    int16x4_t bgrgSum = vzip_s16( vzip_s16( vget_low_s16( bSumWide ), vget_low_s16( rSumWide ) ).val[0], vget_low_s16( gSumWide ) ).val[0];
+    float32x4_t fd = vmulq_n_f32( vcvtq_f32_s32( vmovl_s16( bgrgSum ) ), 4.0f / 16.0f);
+
+    float32x4_t cof = vmlaq_n_f32( vmlaq_n_f32( fd, fb, 255.0f ), fa, 255.0f );
+    float32x4_t chf = vmlaq_n_f32( vmlaq_n_f32( fd, fb, 255.0f ), fa, -425.0f );
+    float32x4_t cvf = vmlaq_n_f32( vmlaq_n_f32( fd, fb, -425.0f ), fa, 255.0f );
+
+    int32x4_t coi = vcvtq_s32_f32( cof );
+    int32x4_t chi = vcvtq_s32_f32( chf );
+    int32x4_t cvi = vcvtq_s32_f32( cvf );
+
+    int32x4x2_t tr_hv = vtrnq_s32( chi, cvi );
+    int32x4x2_t tr_o = vtrnq_s32( coi, coi );
+
+    int16x8_t c_hvoo_br_6 = convert6_NEON( tr_hv.val[0], tr_o.val[0] );
+    int16x4_t c_hvox_g_7 = convert7_NEON( vcombine_s32( vget_low_s32( tr_hv.val[1] ), vget_low_s32( tr_o.val[1] ) ) );
+    int16x8_t c_hvoo_br_8 = vorrq_s16( vshrq_n_s16( c_hvoo_br_6, 4 ), vshlq_n_s16( c_hvoo_br_6, 2 ) );
+    int16x4_t c_hvox_g_8 = vorr_s16( vshr_n_s16( c_hvox_g_7, 6 ), vshl_n_s16( c_hvox_g_7, 1 ) );
+
+    int16x4_t rec_gxbr_o = vext_s16( c_hvox_g_8, vget_high_s16( c_hvoo_br_8 ), 3 );
+
+    rec_gxbr_o = vadd_s16( vshl_n_s16( rec_gxbr_o, 2 ), vdup_n_s16( 2 ) );
+    int16x8_t rec_ro_wide = vdupq_lane_s16( rec_gxbr_o, 3 );
+    int16x8_t rec_go_wide = vdupq_lane_s16( rec_gxbr_o, 0 );
+    int16x8_t rec_bo_wide = vdupq_lane_s16( rec_gxbr_o, 1 );
+
+    int16x4_t br_hv2 = vsub_s16( vget_low_s16( c_hvoo_br_8 ), vget_high_s16( c_hvoo_br_8 ) );
+    int16x4_t gg_hv2 = vsub_s16( c_hvox_g_8, vdup_lane_s16( c_hvox_g_8, 2 ) );
+
+    int16x8_t scaleh_lo = { 0, 0, 0, 0, 1, 1, 1, 1 };
+    int16x8_t scaleh_hi = { 2, 2, 2, 2, 3, 3, 3, 3 };
+    int16x8_t scalev = { 0, 1, 2, 3, 0, 1, 2, 3 };
+
+    int16x8_t rec_r_1 = vmlaq_lane_s16( rec_ro_wide, scalev, br_hv2, 3 );
+    int16x8_t rec_r_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_r_1, scaleh_lo, br_hv2, 2 ), 2 ) ) );
+    int16x8_t rec_r_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_r_1, scaleh_hi, br_hv2, 2 ), 2 ) ) );
+
+    int16x8_t rec_b_1 = vmlaq_lane_s16( rec_bo_wide, scalev, br_hv2, 1 );
+    int16x8_t rec_b_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_b_1, scaleh_lo, br_hv2, 0 ), 2 ) ) );
+    int16x8_t rec_b_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_b_1, scaleh_hi, br_hv2, 0 ), 2 ) ) );
+
+    int16x8_t rec_g_1 = vmlaq_lane_s16( rec_go_wide, scalev, gg_hv2, 1 );
+    int16x8_t rec_g_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_g_1, scaleh_lo, gg_hv2, 0 ), 2 ) ) );
+    int16x8_t rec_g_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_g_1, scaleh_hi, gg_hv2, 0 ), 2 ) ) );
+
+    int16x8_t dif_r_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[2] ) ) ), rec_r_lo );
+    int16x8_t dif_r_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[2] ) ) ), rec_r_hi );
+
+    int16x8_t dif_g_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[1] ) ) ), rec_g_lo );
+    int16x8_t dif_g_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[1] ) ) ), rec_g_hi );
+
+    int16x8_t dif_b_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[0] ) ) ), rec_b_lo );
+    int16x8_t dif_b_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[0] ) ) ), rec_b_hi );
+
+    int16x8_t dif_lo = vmlaq_n_s16( vmlaq_n_s16( vmulq_n_s16( dif_r_lo, 38 ), dif_g_lo, 76 ), dif_b_lo, 14 );
+    int16x8_t dif_hi = vmlaq_n_s16( vmlaq_n_s16( vmulq_n_s16( dif_r_hi, 38 ), dif_g_hi, 76 ), dif_b_hi, 14 );
+
+    int16x4_t tmpDif = vget_low_s16( dif_lo );
+    int32x4_t difsq_0 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_high_s16( dif_lo );
+    int32x4_t difsq_1 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_low_s16( dif_hi );
+    int32x4_t difsq_2 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_high_s16( dif_hi );
+    int32x4_t difsq_3 = vmull_s16( tmpDif, tmpDif );
+
+    uint32x4_t difsq_5 = vaddq_u32( vreinterpretq_u32_s32( difsq_0 ), vreinterpretq_u32_s32( difsq_1 ) );
+    uint32x4_t difsq_6 = vaddq_u32( vreinterpretq_u32_s32( difsq_2 ), vreinterpretq_u32_s32( difsq_3) );
+
+    uint64x2_t difsq_7 = vaddl_u32( vget_low_u32( difsq_5 ), vget_high_u32( difsq_5 ) );
+    uint64x2_t difsq_8 = vaddl_u32( vget_low_u32( difsq_6 ), vget_high_u32( difsq_6 ) );
+
+    uint64x2_t difsq_9 = vaddq_u64( difsq_7, difsq_8 );
+
+#ifdef __aarch64__
+    uint64_t error = vaddvq_u64( difsq_9 );
+#else
+    uint64_t error = vgetq_lane_u64( difsq_9, 0 ) + vgetq_lane_u64( difsq_9, 1 );
+#endif
+
+    int32_t coR = c_hvoo_br_6[6];
+    int32_t coG = c_hvox_g_7[2];
+    int32_t coB = c_hvoo_br_6[4];
+
+    int32_t chR = c_hvoo_br_6[2];
+    int32_t chG = c_hvox_g_7[0];
+    int32_t chB = c_hvoo_br_6[0];
+
+    int32_t cvR = c_hvoo_br_6[3];
+    int32_t cvG = c_hvox_g_7[1];
+    int32_t cvB = c_hvoo_br_6[1];
+
+    uint32_t rgbv = cvB | ( cvG << 6 ) | ( cvR << 13 );
+    uint32_t rgbh = chB | ( chG << 6 ) | ( chR << 13 );
+    uint32_t hi = rgbv | ( ( rgbh & 0x1FFF ) << 19 );
+    uint32_t lo = ( chR & 0x1 ) | 0x2 | ( ( chR << 1 ) & 0x7C );
+    lo |= ( ( coB & 0x07 ) << 7 ) | ( ( coB & 0x18 ) << 8 ) | ( ( coB & 0x20 ) << 11 );
+    lo |= ( ( coG & 0x3F) << 17) | ( (coG & 0x40 ) << 18 );
+    lo |= coR << 25;
+
+    const auto idx = ( coR & 0x20 ) | ( ( coG & 0x20 ) >> 1 ) | ( ( coB & 0x1E ) >> 1 );
+
+    lo |= g_flags[idx];
+
+    uint64_t result = static_cast<uint32_t>( _bswap(lo) );
+    result |= static_cast<uint64_t>( static_cast<uint32_t>( _bswap( hi ) ) ) << 32;
+
+    return std::make_pair( result, error );
+}
+
+#endif
+
+template<class T, class S>
+static etcpak_force_inline uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id, const uint64_t value, const uint64_t error)
+{
+    size_t tidx[2];
+    tidx[0] = GetLeastError( terr[0], 8 );
+    tidx[1] = GetLeastError( terr[1], 8 );
+
+    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
+    {
+        return value;
+    }
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+    for( int i=0; i<16; i++ )
+    {
+        uint64_t t = tsel[i][tidx[id[i]%2]];
+        d |= ( t & 0x1 ) << ( i + 32 );
+        d |= ( t & 0x2 ) << ( i + 47 );
+    }
+
+    return FixByteOrder(d);
+}
+
+}
+
+static etcpak_force_inline uint64_t ProcessRGB( const uint8_t* src )
+{
+#ifdef __AVX2__
+    uint64_t d = CheckSolid_AVX2( src );
+    if( d != 0 ) return d;
+
+    alignas(32) v4i a[8];
+
+    __m128i err0 = PrepareAverages_AVX2( a, src );
+
+    // Get index of minimum error (err0)
+    __m128i err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m128i errMin0 = _mm_min_epu32(err0, err1);
+
+    __m128i errMin1 = _mm_shuffle_epi32(errMin0, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i errMin2 = _mm_min_epu32(errMin1, errMin0);
+
+    __m128i errMask = _mm_cmpeq_epi32(errMin2, err0);
+
+    uint32_t mask = _mm_movemask_epi8(errMask);
+
+    uint32_t idx = _bit_scan_forward(mask) >> 2;
+
+    d |= EncodeAverages_AVX2( a, idx );
+
+    alignas(32) uint32_t terr[2][8] = {};
+    alignas(32) uint32_t tsel[8];
+
+    if ((idx == 0) || (idx == 2))
+    {
+        FindBestFit_4x2_AVX2( terr, tsel, a, idx * 2, src );
+    }
+    else
+    {
+        FindBestFit_2x4_AVX2( terr, tsel, a, idx * 2, src );
+    }
+
+    return EncodeSelectors_AVX2( d, terr, tsel, (idx % 2) == 1 );
+#else
+    uint64_t d = CheckSolid( src );
+    if( d != 0 ) return d;
+
+    v4i a[8];
+    unsigned int err[4] = {};
+    PrepareAverages( a, src, err );
+    size_t idx = GetLeastError( err, 4 );
+    EncodeAverages( d, a, idx );
+
+#if ( defined __SSE4_1__ || defined __ARM_NEON ) && !defined REFERENCE_IMPLEMENTATION
+    uint32_t terr[2][8] = {};
+#else
+    uint64_t terr[2][8] = {};
+#endif
+    uint16_t tsel[16][8];
+    auto id = g_id[idx];
+    FindBestFit( terr, tsel, a, id, src );
+
+    return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
+#endif
+}
+
+static etcpak_force_inline uint64_t ProcessRGB_ETC2( const uint8_t* src )
+{
+#ifdef __AVX2__
+    uint64_t d = CheckSolid_AVX2( src );
+    if( d != 0 ) return d;
+
+    auto plane = Planar_AVX2( src );
+
+    alignas(32) v4i a[8];
+
+    __m128i err0 = PrepareAverages_AVX2( a, plane.sum4 );
+
+    // Get index of minimum error (err0)
+    __m128i err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m128i errMin0 = _mm_min_epu32(err0, err1);
+
+    __m128i errMin1 = _mm_shuffle_epi32(errMin0, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i errMin2 = _mm_min_epu32(errMin1, errMin0);
+
+    __m128i errMask = _mm_cmpeq_epi32(errMin2, err0);
+
+    uint32_t mask = _mm_movemask_epi8(errMask);
+
+    size_t idx = _bit_scan_forward(mask) >> 2;
+
+    d = EncodeAverages_AVX2( a, idx );
+
+    alignas(32) uint32_t terr[2][8] = {};
+    alignas(32) uint32_t tsel[8];
+
+    if ((idx == 0) || (idx == 2))
+    {
+        FindBestFit_4x2_AVX2( terr, tsel, a, idx * 2, src );
+    }
+    else
+    {
+        FindBestFit_2x4_AVX2( terr, tsel, a, idx * 2, src );
+    }
+
+    return EncodeSelectors_AVX2( d, terr, tsel, (idx % 2) == 1, plane.plane, plane.error );
+#else
+    uint64_t d = CheckSolid( src );
+    if (d != 0) return d;
+
+#ifdef __ARM_NEON
+    auto result = Planar_NEON( src );
+#else
+    auto result = Planar( src );
+#endif
+
+    v4i a[8];
+    unsigned int err[4] = {};
+    PrepareAverages( a, src, err );
+    size_t idx = GetLeastError( err, 4 );
+    EncodeAverages( d, a, idx );
+
+#if ( defined __SSE4_1__ || defined __ARM_NEON ) && !defined REFERENCE_IMPLEMENTATION
+    uint32_t terr[2][8] = {};
+#else
+    uint64_t terr[2][8] = {};
+#endif
+    uint16_t tsel[16][8];
+    auto id = g_id[idx];
+    FindBestFit( terr, tsel, a, id, src );
+
+    return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
+#endif
+}
+
+#ifdef __SSE4_1__
+template<int K>
+static etcpak_force_inline __m128i Widen( const __m128i src )
+{
+    static_assert( K >= 0 && K <= 7, "Index out of range" );
+
+    __m128i tmp;
+    switch( K )
+    {
+    case 0:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 1:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 2:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 3:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 3, 3, 3, 3 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 4:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 5:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 6:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 7:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 3, 3, 3, 3 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    }
+}
+
+static etcpak_force_inline int GetMulSel( int sel )
+{
+    switch( sel )
+    {
+    case 0:
+        return 0;
+    case 1:
+    case 2:
+    case 3:
+        return 1;
+    case 4:
+        return 2;
+    case 5:
+    case 6:
+    case 7:
+        return 3;
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+        return 4;
+    case 14:
+    case 15:
+        return 5;
+    }
+}
+
+#endif
+
+#ifdef __ARM_NEON
+
+static constexpr etcpak_force_inline int GetMulSel(int sel)
+{
+    return ( sel < 1 ) ? 0 : ( sel < 4 ) ? 1 : ( sel < 5 ) ? 2 : ( sel < 8 ) ? 3 : ( sel < 14 ) ? 4 : 5;
+}
+
+static constexpr int ClampConstant( int x, int min, int max )
+{
+    return x < min ? min : x > max ? max : x;
+}
+
+template <int Index>
+etcpak_force_inline static uint16x8_t ErrorProbe_EAC_NEON( uint8x8_t recVal, uint8x16_t alphaBlock )
+{
+    uint8x8_t srcValWide;
+#ifndef __aarch64__
+    if( Index < 8 )
+        srcValWide = vdup_lane_u8( vget_low_u8( alphaBlock ), ClampConstant( Index, 0, 8 ) );
+    else
+        srcValWide = vdup_lane_u8( vget_high_u8( alphaBlock ), ClampConstant( Index - 8, 0, 8 ) );
+#else
+    srcValWide = vdup_laneq_u8( alphaBlock, Index );
+#endif
+
+    uint8x8_t deltaVal = vabd_u8( srcValWide, recVal );
+    return vmull_u8( deltaVal, deltaVal );
+}
+
+etcpak_force_inline static uint16_t MinError_EAC_NEON( uint16x8_t errProbe )
+{
+#ifndef __aarch64__
+    uint16x4_t tmpErr = vpmin_u16( vget_low_u16( errProbe ), vget_high_u16( errProbe ) );
+    tmpErr = vpmin_u16( tmpErr, tmpErr );
+    return vpmin_u16( tmpErr, tmpErr )[0];
+#else
+    return vminvq_u16( errProbe );
+#endif
+}
+
+template <int Index>
+etcpak_force_inline static uint64_t MinErrorIndex_EAC_NEON( uint8x8_t recVal, uint8x16_t alphaBlock )
+{
+    uint16x8_t errProbe = ErrorProbe_EAC_NEON<Index>( recVal, alphaBlock );
+    uint16x8_t minErrMask = vceqq_u16( errProbe, vdupq_n_u16( MinError_EAC_NEON( errProbe ) ) );
+    uint64_t idx = __builtin_ctzll( vget_lane_u64( vreinterpret_u64_u8( vqmovn_u16( minErrMask ) ), 0 ) );
+    idx >>= 3;
+    idx <<= 45 - Index * 3;
+
+    return idx;
+}
+
+template <int Index>
+etcpak_force_inline static int16x8_t WidenMultiplier_EAC_NEON( int16x8_t multipliers )
+{
+    constexpr int Lane = GetMulSel( Index );
+#ifndef __aarch64__
+    if( Lane < 4 )
+        return vdupq_lane_s16( vget_low_s16( multipliers ), ClampConstant( Lane, 0, 4 ) );
+    else
+        return vdupq_lane_s16( vget_high_s16( multipliers ), ClampConstant( Lane - 4, 0, 4 ) );
+#else
+    return vdupq_laneq_s16( multipliers, Lane );
+#endif
+}
+
+#endif
+
+static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
+{
+#if defined __SSE4_1__
+    // Check solid
+    __m128i s = _mm_loadu_si128( (__m128i*)src );
+    __m128i solidCmp = _mm_set1_epi8( src[0] );
+    __m128i cmpRes = _mm_cmpeq_epi8( s, solidCmp );
+    if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
+    {
+        return src[0];
+    }
+
+    // Calculate min, max
+    __m128i s1 = _mm_shuffle_epi32( s, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max1 = _mm_max_epu8( s, s1 );
+    __m128i min1 = _mm_min_epu8( s, s1 );
+    __m128i smax2 = _mm_shuffle_epi32( max1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i smin2 = _mm_shuffle_epi32( min1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max2 = _mm_max_epu8( max1, smax2 );
+    __m128i min2 = _mm_min_epu8( min1, smin2 );
+    __m128i smax3 = _mm_alignr_epi8( max2, max2, 2 );
+    __m128i smin3 = _mm_alignr_epi8( min2, min2, 2 );
+    __m128i max3 = _mm_max_epu8( max2, smax3 );
+    __m128i min3 = _mm_min_epu8( min2, smin3 );
+    __m128i smax4 = _mm_alignr_epi8( max3, max3, 1 );
+    __m128i smin4 = _mm_alignr_epi8( min3, min3, 1 );
+    __m128i max = _mm_max_epu8( max3, smax4 );
+    __m128i min = _mm_min_epu8( min3, smin4 );
+    __m128i max16 = _mm_unpacklo_epi8( max, _mm_setzero_si128() );
+    __m128i min16 = _mm_unpacklo_epi8( min, _mm_setzero_si128() );
+
+    // src range, mid
+    __m128i srcRange = _mm_sub_epi16( max16, min16 );
+    __m128i srcRangeHalf = _mm_srli_epi16( srcRange, 1 );
+    __m128i srcMid = _mm_add_epi16( min16, srcRangeHalf );
+
+    // multiplier
+    __m128i mul1 = _mm_mulhi_epi16( srcRange, g_alphaRange_SIMD );
+    __m128i mul = _mm_add_epi16( mul1, _mm_set1_epi16( 1 ) );
+
+    // wide source
+    __m128i s16_1 = _mm_shuffle_epi32( s, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+    __m128i s16[2] = { _mm_unpacklo_epi8( s, _mm_setzero_si128() ), _mm_unpacklo_epi8( s16_1, _mm_setzero_si128() ) };
+
+    __m128i sr[16] = {
+        Widen<0>( s16[0] ),
+        Widen<1>( s16[0] ),
+        Widen<2>( s16[0] ),
+        Widen<3>( s16[0] ),
+        Widen<4>( s16[0] ),
+        Widen<5>( s16[0] ),
+        Widen<6>( s16[0] ),
+        Widen<7>( s16[0] ),
+        Widen<0>( s16[1] ),
+        Widen<1>( s16[1] ),
+        Widen<2>( s16[1] ),
+        Widen<3>( s16[1] ),
+        Widen<4>( s16[1] ),
+        Widen<5>( s16[1] ),
+        Widen<6>( s16[1] ),
+        Widen<7>( s16[1] )
+    };
+
+#ifdef __AVX2__
+    __m256i srcRangeWide = _mm256_broadcastsi128_si256( srcRange );
+    __m256i srcMidWide = _mm256_broadcastsi128_si256( srcMid );
+
+    __m256i mulWide1 = _mm256_mulhi_epi16( srcRangeWide, g_alphaRange_AVX );
+    __m256i mulWide = _mm256_add_epi16( mulWide1, _mm256_set1_epi16( 1 ) );
+
+    __m256i modMul[8] = {
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[0] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[0] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[1] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[1] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[2] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[2] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[3] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[3] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[4] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[4] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[5] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[5] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[6] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[6] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[7] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[7] ) ) ), _mm256_setzero_si256() ),
+    };
+
+    // find selector
+    __m256i mulErr = _mm256_setzero_si256();
+    for( int j=0; j<16; j++ )
+    {
+        __m256i s16Wide = _mm256_broadcastsi128_si256( sr[j] );
+        __m256i err1, err2;
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[0] );
+        __m256i localErr = _mm256_mullo_epi16( err1, err1 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[1] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[2] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[3] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[4] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[5] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[6] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[7] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        // note that this can overflow, but since we're looking for the smallest error, it shouldn't matter
+        mulErr = _mm256_adds_epu16( mulErr, localErr );
+    }
+    uint64_t minPos1 = _mm_cvtsi128_si64( _mm_minpos_epu16( _mm256_castsi256_si128( mulErr ) ) );
+    uint64_t minPos2 = _mm_cvtsi128_si64( _mm_minpos_epu16( _mm256_extracti128_si256( mulErr, 1 ) ) );
+    int sel = ( ( minPos1 & 0xFFFF ) < ( minPos2 & 0xFFFF ) ) ? ( minPos1 >> 16 ) : ( 8 + ( minPos2 >> 16 ) );
+
+    __m128i recVal16;
+    switch( sel )
+    {
+    case 0:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ) ), _mm_setzero_si128() );
+        break;
+    case 1:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ) ), _mm_setzero_si128() );
+        break;
+    case 2:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ) ), _mm_setzero_si128() );
+        break;
+    case 3:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ) ), _mm_setzero_si128() );
+        break;
+    case 4:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ) ), _mm_setzero_si128() );
+        break;
+    case 5:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ) ), _mm_setzero_si128() );
+        break;
+    case 6:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ) ), _mm_setzero_si128() );
+        break;
+    case 7:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ) ), _mm_setzero_si128() );
+        break;
+    case 8:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ) ), _mm_setzero_si128() );
+        break;
+    case 9:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ) ), _mm_setzero_si128() );
+        break;
+    case 10:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ) ), _mm_setzero_si128() );
+        break;
+    case 11:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ) ), _mm_setzero_si128() );
+        break;
+    case 12:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ) ), _mm_setzero_si128() );
+        break;
+    case 13:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ) ), _mm_setzero_si128() );
+        break;
+    case 14:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ) ), _mm_setzero_si128() );
+        break;
+    case 15:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ) ), _mm_setzero_si128() );
+        break;
+    default:
+        assert( false );
+        break;
+    }
+#else
+    // wide multiplier
+    __m128i rangeMul[16] = {
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ) ), _mm_setzero_si128() )
+    };
+
+    // find selector
+    int err = std::numeric_limits<int>::max();
+    int sel;
+    for( int r=0; r<16; r++ )
+    {
+        __m128i err1, err2, minerr;
+        __m128i recVal16 = rangeMul[r];
+        int rangeErr;
+
+        err1 = _mm_sub_epi16( sr[0], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr = _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[1], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[2], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[3], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[4], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[5], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[6], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[7], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[8], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[9], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[10], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[11], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[12], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[13], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[14], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[15], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            if( err == 0 ) break;
+        }
+    }
+
+    __m128i recVal16 = rangeMul[sel];
+#endif
+
+    // find indices
+    __m128i err1, err2, minerr;
+    uint64_t idx = 0, tmp;
+
+    err1 = _mm_sub_epi16( sr[0], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 15*3;
+
+    err1 = _mm_sub_epi16( sr[1], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 14*3;
+
+    err1 = _mm_sub_epi16( sr[2], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 13*3;
+
+    err1 = _mm_sub_epi16( sr[3], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 12*3;
+
+    err1 = _mm_sub_epi16( sr[4], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 11*3;
+
+    err1 = _mm_sub_epi16( sr[5], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 10*3;
+
+    err1 = _mm_sub_epi16( sr[6], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 9*3;
+
+    err1 = _mm_sub_epi16( sr[7], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 8*3;
+
+    err1 = _mm_sub_epi16( sr[8], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 7*3;
+
+    err1 = _mm_sub_epi16( sr[9], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 6*3;
+
+    err1 = _mm_sub_epi16( sr[10], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 5*3;
+
+    err1 = _mm_sub_epi16( sr[11], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 4*3;
+
+    err1 = _mm_sub_epi16( sr[12], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 3*3;
+
+    err1 = _mm_sub_epi16( sr[13], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 2*3;
+
+    err1 = _mm_sub_epi16( sr[14], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 1*3;
+
+    err1 = _mm_sub_epi16( sr[15], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 0*3;
+
+    uint16_t rm[8];
+    _mm_storeu_si128( (__m128i*)rm, mul );
+    uint16_t sm = _mm_cvtsi128_si64( srcMid );
+
+    uint64_t d = ( uint64_t( sm ) << 56 ) |
+        ( uint64_t( rm[GetMulSel( sel )] ) << 52 ) |
+        ( uint64_t( sel ) << 48 ) |
+        idx;
+
+    return _bswap64( d );
+#elif defined __ARM_NEON
+
+    int16x8_t srcMidWide, multipliers;
+    int srcMid;
+    uint8x16_t srcAlphaBlock = vld1q_u8( src );
+    {
+        uint8_t ref = src[0];
+        uint8x16_t a0 = vdupq_n_u8( ref );
+        uint8x16_t r = vceqq_u8( srcAlphaBlock, a0 );
+        int64x2_t m = vreinterpretq_s64_u8( r );
+        if( m[0] == -1 && m[1] == -1 )
+            return ref;
+
+        // srcRange
+#ifdef __aarch64__
+        uint8_t min = vminvq_u8( srcAlphaBlock );
+        uint8_t max = vmaxvq_u8( srcAlphaBlock );
+        uint8_t srcRange = max - min;
+        multipliers = vqaddq_s16( vshrq_n_s16( vqdmulhq_n_s16( g_alphaRange_NEON, srcRange ), 1 ), vdupq_n_s16( 1 ) );
+        srcMid = min + srcRange / 2;
+        srcMidWide = vdupq_n_s16( srcMid );
+#else
+        uint8x8_t vmin = vpmin_u8( vget_low_u8( srcAlphaBlock ), vget_high_u8( srcAlphaBlock ) );
+        vmin = vpmin_u8( vmin, vmin );
+        vmin = vpmin_u8( vmin, vmin );
+        vmin = vpmin_u8( vmin, vmin );
+        uint8x8_t vmax = vpmax_u8( vget_low_u8( srcAlphaBlock ), vget_high_u8( srcAlphaBlock ) );
+        vmax = vpmax_u8( vmax, vmax );
+        vmax = vpmax_u8( vmax, vmax );
+        vmax = vpmax_u8( vmax, vmax );
+
+        int16x8_t srcRangeWide = vreinterpretq_s16_u16( vsubl_u8( vmax, vmin ) );
+        multipliers = vqaddq_s16( vshrq_n_s16( vqdmulhq_s16( g_alphaRange_NEON, srcRangeWide ), 1 ), vdupq_n_s16( 1 ) );
+        srcMidWide = vsraq_n_s16( vreinterpretq_s16_u16(vmovl_u8(vmin)), srcRangeWide, 1);
+        srcMid = vgetq_lane_s16( srcMidWide, 0 );
+#endif
+    }
+
+    // calculate reconstructed values
+#define EAC_APPLY_16X( m ) m( 0 ) m( 1 ) m( 2 ) m( 3 ) m( 4 ) m( 5 ) m( 6 ) m( 7 ) m( 8 ) m( 9 ) m( 10 ) m( 11 ) m( 12 ) m( 13 ) m( 14 ) m( 15 )
+
+#define EAC_RECONSTRUCT_VALUE( n ) vqmovun_s16( vmlaq_s16( srcMidWide, g_alpha_NEON[n], WidenMultiplier_EAC_NEON<n>( multipliers ) ) ),
+    uint8x8_t recVals[16] = { EAC_APPLY_16X( EAC_RECONSTRUCT_VALUE ) };
+
+    // find selector
+    int err = std::numeric_limits<int>::max();
+    int sel = 0;
+    for( int r = 0; r < 16; r++ )
+    {
+        uint8x8_t recVal = recVals[r];
+
+        int rangeErr = 0;
+#define EAC_ACCUMULATE_ERROR( n ) rangeErr += MinError_EAC_NEON( ErrorProbe_EAC_NEON<n>( recVal, srcAlphaBlock ) );
+        EAC_APPLY_16X( EAC_ACCUMULATE_ERROR )
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            if ( err == 0 ) break;
+        }
+    }
+
+    // combine results
+    uint64_t d = ( uint64_t( srcMid ) << 56 ) |
+        ( uint64_t( multipliers[GetMulSel( sel )] ) << 52 ) |
+        ( uint64_t( sel ) << 48);
+
+    // generate indices
+    uint8x8_t recVal = recVals[sel];
+#define EAC_INSERT_INDEX(n) d |= MinErrorIndex_EAC_NEON<n>( recVal, srcAlphaBlock );
+    EAC_APPLY_16X( EAC_INSERT_INDEX )
+
+    return _bswap64( d );
+
+#undef EAC_APPLY_16X
+#undef EAC_INSERT_INDEX
+#undef EAC_ACCUMULATE_ERROR
+#undef EAC_RECONSTRUCT_VALUE
+
+#else
+    {
+        bool solid = true;
+        const uint8_t* ptr = src + 1;
+        const uint8_t ref = *src;
+        for( int i=1; i<16; i++ )
+        {
+            if( ref != *ptr++ )
+            {
+                solid = false;
+                break;
+            }
+        }
+        if( solid )
+        {
+            return ref;
+        }
+    }
+
+    uint8_t min = src[0];
+    uint8_t max = src[0];
+    for( int i=1; i<16; i++ )
+    {
+        if( min > src[i] ) min = src[i];
+        else if( max < src[i] ) max = src[i];
+    }
+    int srcRange = max - min;
+    int srcMid = min + srcRange / 2;
+
+    uint8_t buf[16][16];
+    int err = std::numeric_limits<int>::max();
+    int sel;
+    int selmul;
+    for( int r=0; r<16; r++ )
+    {
+        int mul = ( ( srcRange * g_alphaRange[r] ) >> 16 ) + 1;
+
+        int rangeErr = 0;
+        for( int i=0; i<16; i++ )
+        {
+            const auto srcVal = src[i];
+
+            int idx = 0;
+            const auto modVal = g_alpha[r][0] * mul;
+            const auto recVal = clampu8( srcMid + modVal );
+            int localErr = sq( srcVal - recVal );
+
+            if( localErr != 0 )
+            {
+                for( int j=1; j<8; j++ )
+                {
+                    const auto modVal = g_alpha[r][j] * mul;
+                    const auto recVal = clampu8( srcMid + modVal );
+                    const auto errProbe = sq( srcVal - recVal );
+                    if( errProbe < localErr )
+                    {
+                        localErr = errProbe;
+                        idx = j;
+                    }
+                }
+            }
+
+            buf[r][i] = idx;
+            rangeErr += localErr;
+        }
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            selmul = mul;
+            if( err == 0 ) break;
+        }
+    }
+
+    uint64_t d = ( uint64_t( srcMid ) << 56 ) |
+        ( uint64_t( selmul ) << 52 ) |
+        ( uint64_t( sel ) << 48 );
+
+    int offset = 45;
+    auto ptr = buf[sel];
+    for( int i=0; i<16; i++ )
+    {
+        d |= uint64_t( *ptr++ ) << offset;
+        offset -= 3;
+    }
+
+    return _bswap64( d );
+#endif
+}
+
+
+void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        __m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
+        __m128i p0 = _mm_shuffle_epi8( c0, mask );
+        __m128i p1 = _mm_shuffle_epi8( c1, mask );
+        __m128i p2 = _mm_shuffle_epi8( c2, mask );
+        __m128i p3 = _mm_shuffle_epi8( c3, mask );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  p0 );
+        _mm_store_si128( (__m128i*)(buf + 4),  p1 );
+        _mm_store_si128( (__m128i*)(buf + 8),  p2 );
+        _mm_store_si128( (__m128i*)(buf + 12), p3 );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            unsigned int a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        __m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
+        __m128i p0 = _mm_shuffle_epi8( c0, mask );
+        __m128i p1 = _mm_shuffle_epi8( c1, mask );
+        __m128i p2 = _mm_shuffle_epi8( c2, mask );
+        __m128i p3 = _mm_shuffle_epi8( c3, mask );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  p0 );
+        _mm_store_si128( (__m128i*)(buf + 4),  p1 );
+        _mm_store_si128( (__m128i*)(buf + 8),  p2 );
+        _mm_store_si128( (__m128i*)(buf + 12), p3 );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            unsigned int a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+#include <chrono>
+#include <thread>
+
+void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+#  ifdef __AVX2__
+        DitherAvx2( (uint8_t*)buf, _mm_castps_si128( px0 ), _mm_castps_si128( px1 ), _mm_castps_si128( px2 ), _mm_castps_si128( px3 ) );
+#  else
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        Dither( (uint8_t*)buf );
+#  endif
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t rgba[4*4];
+    uint8_t alpha[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        _mm_store_si128( (__m128i*)(rgba + 0),  c0 );
+        _mm_store_si128( (__m128i*)(rgba + 4),  c1 );
+        _mm_store_si128( (__m128i*)(rgba + 8),  c2 );
+        _mm_store_si128( (__m128i*)(rgba + 12), c3 );
+
+        __m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );
+
+        __m128i a0 = _mm_shuffle_epi8( c0, mask );
+        __m128i a1 = _mm_shuffle_epi8( c1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
+        __m128i a2 = _mm_shuffle_epi8( c2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
+        __m128i a3 = _mm_shuffle_epi8( c3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
+
+        __m128i s0 = _mm_or_si128( a0, a1 );
+        __m128i s1 = _mm_or_si128( a2, a3 );
+        __m128i s2 = _mm_or_si128( s0, s1 );
+
+        _mm_store_si128( (__m128i*)alpha, s2 );
+
+        src += 4;
+#else
+        auto ptr = rgba;
+        auto ptr8 = alpha;
+        for( int x=0; x<4; x++ )
+        {
+            auto v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessAlpha_ETC2( alpha );
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)rgba );
+    }
+    while( --blocks );
+}
diff --git a/thirdparty/etcpak/ProcessRGB.hpp b/thirdparty/etcpak/ProcessRGB.hpp
new file mode 100644
index 0000000000..c5555a5bb1
--- /dev/null
+++ b/thirdparty/etcpak/ProcessRGB.hpp
@@ -0,0 +1,13 @@
+#ifndef __PROCESSRGB_HPP__
+#define __PROCESSRGB_HPP__
+
+#include <stdint.h>
+
+void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+
+#endif
diff --git a/thirdparty/etcpak/Semaphore.hpp b/thirdparty/etcpak/Semaphore.hpp
new file mode 100644
index 0000000000..9e42dbb9e0
--- /dev/null
+++ b/thirdparty/etcpak/Semaphore.hpp
@@ -0,0 +1,46 @@
+#ifndef __DARKRL__SEMAPHORE_HPP__
+#define __DARKRL__SEMAPHORE_HPP__
+
+#include <condition_variable>
+#include <mutex>
+
+class Semaphore
+{
+public:
+    Semaphore( int count ) : m_count( count ) {}
+
+    void lock()
+    {
+        std::unique_lock<std::mutex> lock( m_mutex );
+        m_cv.wait( lock, [this](){ return m_count != 0; } );
+        m_count--;
+    }
+
+    void unlock()
+    {
+        std::lock_guard<std::mutex> lock( m_mutex );
+        m_count++;
+        m_cv.notify_one();
+    }
+
+    bool try_lock()
+    {
+        std::lock_guard<std::mutex> lock( m_mutex );
+        if( m_count == 0 )
+        {
+            return false;
+        }
+        else
+        {
+            m_count--;
+            return true;
+        }
+    }
+
+private:
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    unsigned int m_count;
+};
+
+#endif
diff --git a/thirdparty/etcpak/System.cpp b/thirdparty/etcpak/System.cpp
new file mode 100644
index 0000000000..041f2676e8
--- /dev/null
+++ b/thirdparty/etcpak/System.cpp
@@ -0,0 +1,65 @@
+#include <algorithm>
+#ifdef _WIN32
+#  include <windows.h>
+#else
+#  include <unistd.h>
+#endif
+
+#include "System.hpp"
+
+unsigned int System::CPUCores()
+{
+    static unsigned int cores = 0;
+    if( cores == 0 )
+    {
+        int tmp;
+#ifdef _WIN32
+        SYSTEM_INFO info;
+        GetSystemInfo( &info );
+        tmp = (int)info.dwNumberOfProcessors;
+#else
+#  ifndef _SC_NPROCESSORS_ONLN
+#    ifdef _SC_NPROC_ONLN
+#      define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
+#    elif defined _SC_CRAY_NCPU
+#      define _SC_NPROCESSORS_ONLN _SC_CRAY_NCPU
+#    endif
+#  endif
+        tmp = (int)(long)sysconf( _SC_NPROCESSORS_ONLN );
+#endif
+        cores = (unsigned int)std::max( tmp, 1 );
+    }
+    return cores;
+}
+
+void System::SetThreadName( std::thread& thread, const char* name )
+{
+#ifdef _MSC_VER
+    const DWORD MS_VC_EXCEPTION=0x406D1388;
+
+#  pragma pack( push, 8 )
+    struct THREADNAME_INFO
+    {
+       DWORD dwType;
+       LPCSTR szName;
+       DWORD dwThreadID;
+       DWORD dwFlags;
+    };
+#  pragma pack(pop)
+
+    DWORD ThreadId = GetThreadId( static_cast<HANDLE>( thread.native_handle() ) );
+    THREADNAME_INFO info;
+    info.dwType = 0x1000;
+    info.szName = name;
+    info.dwThreadID = ThreadId;
+    info.dwFlags = 0;
+
+    __try
+    {
+       RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
+    }
+    __except(EXCEPTION_EXECUTE_HANDLER)
+    {
+    }
+#endif
+}
diff --git a/thirdparty/etcpak/System.hpp b/thirdparty/etcpak/System.hpp
new file mode 100644
index 0000000000..1a09bb15e1
--- /dev/null
+++ b/thirdparty/etcpak/System.hpp
@@ -0,0 +1,15 @@
+#ifndef __DARKRL__SYSTEM_HPP__
+#define __DARKRL__SYSTEM_HPP__
+
+#include <thread>
+
+class System
+{
+public:
+    System() = delete;
+
+    static unsigned int CPUCores();
+    static void SetThreadName( std::thread& thread, const char* name );
+};
+
+#endif
diff --git a/thirdparty/etcpak/Tables.cpp b/thirdparty/etcpak/Tables.cpp
new file mode 100644
index 0000000000..5c7fd9cf61
--- /dev/null
+++ b/thirdparty/etcpak/Tables.cpp
@@ -0,0 +1,221 @@
+#include "Tables.hpp"
+
+const int32_t g_table[8][4] = {
+    {  2,  8,   -2,   -8 },
+    {  5, 17,   -5,  -17 },
+    {  9, 29,   -9,  -29 },
+    { 13, 42,  -13,  -42 },
+    { 18, 60,  -18,  -60 },
+    { 24, 80,  -24,  -80 },
+    { 33, 106, -33, -106 },
+    { 47, 183, -47, -183 }
+};
+
+const int64_t g_table256[8][4] = {
+    {  2*256,  8*256,   -2*256,   -8*256 },
+    {  5*256, 17*256,   -5*256,  -17*256 },
+    {  9*256, 29*256,   -9*256,  -29*256 },
+    { 13*256, 42*256,  -13*256,  -42*256 },
+    { 18*256, 60*256,  -18*256,  -60*256 },
+    { 24*256, 80*256,  -24*256,  -80*256 },
+    { 33*256, 106*256, -33*256, -106*256 },
+    { 47*256, 183*256, -47*256, -183*256 }
+};
+
+const uint32_t g_id[4][16] = {
+    { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
+    { 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
+    { 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
+};
+
+const uint32_t g_avg2[16] = {
+    0x00,
+    0x11,
+    0x22,
+    0x33,
+    0x44,
+    0x55,
+    0x66,
+    0x77,
+    0x88,
+    0x99,
+    0xAA,
+    0xBB,
+    0xCC,
+    0xDD,
+    0xEE,
+    0xFF
+};
+
+const uint32_t g_flags[64] = {
+    0x80800402, 0x80800402, 0x80800402, 0x80800402,
+    0x80800402, 0x80800402, 0x80800402, 0x8080E002,
+    0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
+    0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
+    0x80000402, 0x80000402, 0x80000402, 0x80000402,
+    0x80000402, 0x80000402, 0x80000402, 0x8000E002,
+    0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
+    0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
+    0x00800402, 0x00800402, 0x00800402, 0x00800402,
+    0x00800402, 0x00800402, 0x00800402, 0x0080E002,
+    0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
+    0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
+    0x00000402, 0x00000402, 0x00000402, 0x00000402,
+    0x00000402, 0x00000402, 0x00000402, 0x0000E002,
+    0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
+    0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
+};
+
+const int32_t g_alpha[16][8] = {
+    { -3, -6,  -9, -15, 2, 5, 8, 14 },
+    { -3, -7, -10, -13, 2, 6, 9, 12 },
+    { -2, -5,  -8, -13, 1, 4, 7, 12 },
+    { -2, -4,  -6, -13, 1, 3, 5, 12 },
+    { -3, -6,  -8, -12, 2, 5, 7, 11 },
+    { -3, -7,  -9, -11, 2, 6, 8, 10 },
+    { -4, -7,  -8, -11, 3, 6, 7, 10 },
+    { -3, -5,  -8, -11, 2, 4, 7, 10 },
+    { -2, -6,  -8, -10, 1, 5, 7,  9 },
+    { -2, -5,  -8, -10, 1, 4, 7,  9 },
+    { -2, -4,  -8, -10, 1, 3, 7,  9 },
+    { -2, -5,  -7, -10, 1, 4, 6,  9 },
+    { -3, -4,  -7, -10, 2, 3, 6,  9 },
+    { -1, -2,  -3, -10, 0, 1, 2,  9 },
+    { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+    { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+};
+
+const int32_t g_alphaRange[16] = {
+    0x100FF / ( 1 + g_alpha[0][7] - g_alpha[0][3] ),
+    0x100FF / ( 1 + g_alpha[1][7] - g_alpha[1][3] ),
+    0x100FF / ( 1 + g_alpha[2][7] - g_alpha[2][3] ),
+    0x100FF / ( 1 + g_alpha[3][7] - g_alpha[3][3] ),
+    0x100FF / ( 1 + g_alpha[4][7] - g_alpha[4][3] ),
+    0x100FF / ( 1 + g_alpha[5][7] - g_alpha[5][3] ),
+    0x100FF / ( 1 + g_alpha[6][7] - g_alpha[6][3] ),
+    0x100FF / ( 1 + g_alpha[7][7] - g_alpha[7][3] ),
+    0x100FF / ( 1 + g_alpha[8][7] - g_alpha[8][3] ),
+    0x100FF / ( 1 + g_alpha[9][7] - g_alpha[9][3] ),
+    0x100FF / ( 1 + g_alpha[10][7] - g_alpha[10][3] ),
+    0x100FF / ( 1 + g_alpha[11][7] - g_alpha[11][3] ),
+    0x100FF / ( 1 + g_alpha[12][7] - g_alpha[12][3] ),
+    0x100FF / ( 1 + g_alpha[13][7] - g_alpha[13][3] ),
+    0x100FF / ( 1 + g_alpha[14][7] - g_alpha[14][3] ),
+    0x100FF / ( 1 + g_alpha[15][7] - g_alpha[15][3] ),
+};
+
+#ifdef __SSE4_1__
+const __m128i g_table_SIMD[2] =
+{
+    _mm_setr_epi16(   2,   5,   9,  13,  18,  24,  33,  47),
+    _mm_setr_epi16(   8,  17,  29,  42,  60,  80, 106, 183)
+};
+const __m128i g_table128_SIMD[2] =
+{
+    _mm_setr_epi16(   2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128),
+    _mm_setr_epi16(   8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128)
+};
+const __m128i g_table256_SIMD[4] =
+{
+    _mm_setr_epi32(  2*256,   5*256,   9*256,  13*256),
+    _mm_setr_epi32(  8*256,  17*256,  29*256,  42*256),
+    _mm_setr_epi32( 18*256,  24*256,  33*256,  47*256),
+    _mm_setr_epi32( 60*256,  80*256, 106*256, 183*256)
+};
+
+const __m128i g_alpha_SIMD[16] = {
+    _mm_setr_epi16( g_alpha[ 0][0], g_alpha[ 0][1], g_alpha[ 0][2], g_alpha[ 0][3], g_alpha[ 0][4], g_alpha[ 0][5], g_alpha[ 0][6], g_alpha[ 0][7] ),
+    _mm_setr_epi16( g_alpha[ 1][0], g_alpha[ 1][1], g_alpha[ 1][2], g_alpha[ 1][3], g_alpha[ 1][4], g_alpha[ 1][5], g_alpha[ 1][6], g_alpha[ 1][7] ),
+    _mm_setr_epi16( g_alpha[ 2][0], g_alpha[ 2][1], g_alpha[ 2][2], g_alpha[ 2][3], g_alpha[ 2][4], g_alpha[ 2][5], g_alpha[ 2][6], g_alpha[ 2][7] ),
+    _mm_setr_epi16( g_alpha[ 3][0], g_alpha[ 3][1], g_alpha[ 3][2], g_alpha[ 3][3], g_alpha[ 3][4], g_alpha[ 3][5], g_alpha[ 3][6], g_alpha[ 3][7] ),
+    _mm_setr_epi16( g_alpha[ 4][0], g_alpha[ 4][1], g_alpha[ 4][2], g_alpha[ 4][3], g_alpha[ 4][4], g_alpha[ 4][5], g_alpha[ 4][6], g_alpha[ 4][7] ),
+    _mm_setr_epi16( g_alpha[ 5][0], g_alpha[ 5][1], g_alpha[ 5][2], g_alpha[ 5][3], g_alpha[ 5][4], g_alpha[ 5][5], g_alpha[ 5][6], g_alpha[ 5][7] ),
+    _mm_setr_epi16( g_alpha[ 6][0], g_alpha[ 6][1], g_alpha[ 6][2], g_alpha[ 6][3], g_alpha[ 6][4], g_alpha[ 6][5], g_alpha[ 6][6], g_alpha[ 6][7] ),
+    _mm_setr_epi16( g_alpha[ 7][0], g_alpha[ 7][1], g_alpha[ 7][2], g_alpha[ 7][3], g_alpha[ 7][4], g_alpha[ 7][5], g_alpha[ 7][6], g_alpha[ 7][7] ),
+    _mm_setr_epi16( g_alpha[ 8][0], g_alpha[ 8][1], g_alpha[ 8][2], g_alpha[ 8][3], g_alpha[ 8][4], g_alpha[ 8][5], g_alpha[ 8][6], g_alpha[ 8][7] ),
+    _mm_setr_epi16( g_alpha[ 9][0], g_alpha[ 9][1], g_alpha[ 9][2], g_alpha[ 9][3], g_alpha[ 9][4], g_alpha[ 9][5], g_alpha[ 9][6], g_alpha[ 9][7] ),
+    _mm_setr_epi16( g_alpha[10][0], g_alpha[10][1], g_alpha[10][2], g_alpha[10][3], g_alpha[10][4], g_alpha[10][5], g_alpha[10][6], g_alpha[10][7] ),
+    _mm_setr_epi16( g_alpha[11][0], g_alpha[11][1], g_alpha[11][2], g_alpha[11][3], g_alpha[11][4], g_alpha[11][5], g_alpha[11][6], g_alpha[11][7] ),
+    _mm_setr_epi16( g_alpha[12][0], g_alpha[12][1], g_alpha[12][2], g_alpha[12][3], g_alpha[12][4], g_alpha[12][5], g_alpha[12][6], g_alpha[12][7] ),
+    _mm_setr_epi16( g_alpha[13][0], g_alpha[13][1], g_alpha[13][2], g_alpha[13][3], g_alpha[13][4], g_alpha[13][5], g_alpha[13][6], g_alpha[13][7] ),
+    _mm_setr_epi16( g_alpha[14][0], g_alpha[14][1], g_alpha[14][2], g_alpha[14][3], g_alpha[14][4], g_alpha[14][5], g_alpha[14][6], g_alpha[14][7] ),
+    _mm_setr_epi16( g_alpha[15][0], g_alpha[15][1], g_alpha[15][2], g_alpha[15][3], g_alpha[15][4], g_alpha[15][5], g_alpha[15][6], g_alpha[15][7] ),
+};
+
+const __m128i g_alphaRange_SIMD = _mm_setr_epi16(
+    g_alphaRange[0],
+    g_alphaRange[1],
+    g_alphaRange[4],
+    g_alphaRange[5],
+    g_alphaRange[8],
+    g_alphaRange[14],
+    0,
+    0 );
+#endif
+
+#ifdef __AVX2__
+const __m256i g_alpha_AVX[8] = {
+    _mm256_setr_epi16( g_alpha[ 0][0], g_alpha[ 1][0], g_alpha[ 2][0], g_alpha[ 3][0], g_alpha[ 4][0], g_alpha[ 5][0], g_alpha[ 6][0], g_alpha[ 7][0], g_alpha[ 8][0], g_alpha[ 9][0], g_alpha[10][0], g_alpha[11][0], g_alpha[12][0], g_alpha[13][0], g_alpha[14][0], g_alpha[15][0] ),
+    _mm256_setr_epi16( g_alpha[ 0][1], g_alpha[ 1][1], g_alpha[ 2][1], g_alpha[ 3][1], g_alpha[ 4][1], g_alpha[ 5][1], g_alpha[ 6][1], g_alpha[ 7][1], g_alpha[ 8][1], g_alpha[ 9][1], g_alpha[10][1], g_alpha[11][1], g_alpha[12][1], g_alpha[13][1], g_alpha[14][1], g_alpha[15][1] ),
+    _mm256_setr_epi16( g_alpha[ 0][2], g_alpha[ 1][2], g_alpha[ 2][2], g_alpha[ 3][2], g_alpha[ 4][2], g_alpha[ 5][2], g_alpha[ 6][2], g_alpha[ 7][2], g_alpha[ 8][2], g_alpha[ 9][2], g_alpha[10][2], g_alpha[11][2], g_alpha[12][2], g_alpha[13][2], g_alpha[14][2], g_alpha[15][2] ),
+    _mm256_setr_epi16( g_alpha[ 0][3], g_alpha[ 1][3], g_alpha[ 2][3], g_alpha[ 3][3], g_alpha[ 4][3], g_alpha[ 5][3], g_alpha[ 6][3], g_alpha[ 7][3], g_alpha[ 8][3], g_alpha[ 9][3], g_alpha[10][3], g_alpha[11][3], g_alpha[12][3], g_alpha[13][3], g_alpha[14][3], g_alpha[15][3] ),
+    _mm256_setr_epi16( g_alpha[ 0][4], g_alpha[ 1][4], g_alpha[ 2][4], g_alpha[ 3][4], g_alpha[ 4][4], g_alpha[ 5][4], g_alpha[ 6][4], g_alpha[ 7][4], g_alpha[ 8][4], g_alpha[ 9][4], g_alpha[10][4], g_alpha[11][4], g_alpha[12][4], g_alpha[13][4], g_alpha[14][4], g_alpha[15][4] ),
+    _mm256_setr_epi16( g_alpha[ 0][5], g_alpha[ 1][5], g_alpha[ 2][5], g_alpha[ 3][5], g_alpha[ 4][5], g_alpha[ 5][5], g_alpha[ 6][5], g_alpha[ 7][5], g_alpha[ 8][5], g_alpha[ 9][5], g_alpha[10][5], g_alpha[11][5], g_alpha[12][5], g_alpha[13][5], g_alpha[14][5], g_alpha[15][5] ),
+    _mm256_setr_epi16( g_alpha[ 0][6], g_alpha[ 1][6], g_alpha[ 2][6], g_alpha[ 3][6], g_alpha[ 4][6], g_alpha[ 5][6], g_alpha[ 6][6], g_alpha[ 7][6], g_alpha[ 8][6], g_alpha[ 9][6], g_alpha[10][6], g_alpha[11][6], g_alpha[12][6], g_alpha[13][6], g_alpha[14][6], g_alpha[15][6] ),
+    _mm256_setr_epi16( g_alpha[ 0][7], g_alpha[ 1][7], g_alpha[ 2][7], g_alpha[ 3][7], g_alpha[ 4][7], g_alpha[ 5][7], g_alpha[ 6][7], g_alpha[ 7][7], g_alpha[ 8][7], g_alpha[ 9][7], g_alpha[10][7], g_alpha[11][7], g_alpha[12][7], g_alpha[13][7], g_alpha[14][7], g_alpha[15][7] ),
+};
+
+const __m256i g_alphaRange_AVX = _mm256_setr_epi16(
+    g_alphaRange[ 0], g_alphaRange[ 1], g_alphaRange[ 2], g_alphaRange[ 3], g_alphaRange[ 4], g_alphaRange[ 5], g_alphaRange[ 6], g_alphaRange[ 7],
+    g_alphaRange[ 8], g_alphaRange[ 9], g_alphaRange[10], g_alphaRange[11], g_alphaRange[12], g_alphaRange[13], g_alphaRange[14], g_alphaRange[15]
+);
+#endif
+
+#ifdef __ARM_NEON
+const int16x8_t g_table128_NEON[2] =
+{
+    { 2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128 },
+    { 8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128 }
+};
+
+const int32x4_t g_table256_NEON[4] =
+{
+    {  2*256,   5*256,   9*256,  13*256 },
+    {  8*256,  17*256,  29*256,  42*256 },
+    { 18*256,  24*256,  33*256,  47*256 },
+    { 60*256,  80*256, 106*256, 183*256 }
+};
+
+const int16x8_t g_alpha_NEON[16] =
+{
+    { -3, -6,  -9, -15, 2, 5, 8, 14 },
+    { -3, -7, -10, -13, 2, 6, 9, 12 },
+    { -2, -5,  -8, -13, 1, 4, 7, 12 },
+    { -2, -4,  -6, -13, 1, 3, 5, 12 },
+    { -3, -6,  -8, -12, 2, 5, 7, 11 },
+    { -3, -7,  -9, -11, 2, 6, 8, 10 },
+    { -4, -7,  -8, -11, 3, 6, 7, 10 },
+    { -3, -5,  -8, -11, 2, 4, 7, 10 },
+    { -2, -6,  -8, -10, 1, 5, 7,  9 },
+    { -2, -5,  -8, -10, 1, 4, 7,  9 },
+    { -2, -4,  -8, -10, 1, 3, 7,  9 },
+    { -2, -5,  -7, -10, 1, 4, 6,  9 },
+    { -3, -4,  -7, -10, 2, 3, 6,  9 },
+    { -1, -2,  -3, -10, 0, 1, 2,  9 },
+    { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+    { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+};
+
+const int16x8_t g_alphaRange_NEON =
+{
+    (int16_t)g_alphaRange[0],
+    (int16_t)g_alphaRange[1],
+    (int16_t)g_alphaRange[4],
+    (int16_t)g_alphaRange[5],
+    (int16_t)g_alphaRange[8],
+    (int16_t)g_alphaRange[14],
+    0,
+    0
+};
+#endif
diff --git a/thirdparty/etcpak/Tables.hpp b/thirdparty/etcpak/Tables.hpp
new file mode 100644
index 0000000000..69d7e8aa07
--- /dev/null
+++ b/thirdparty/etcpak/Tables.hpp
@@ -0,0 +1,49 @@
+#ifndef __TABLES_HPP__
+#define __TABLES_HPP__
+
+#include <stdint.h>
+
+#ifdef __AVX2__
+#  include <immintrin.h>
+#endif
+#ifdef __SSE4_1__
+#  include <smmintrin.h>
+#endif
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+extern const int32_t g_table[8][4];
+extern const int64_t g_table256[8][4];
+
+extern const uint32_t g_id[4][16];
+
+extern const uint32_t g_avg2[16];
+
+extern const uint32_t g_flags[64];
+
+extern const int32_t g_alpha[16][8];
+extern const int32_t g_alphaRange[16];
+
+#ifdef __SSE4_1__
+extern const __m128i g_table_SIMD[2];
+extern const __m128i g_table128_SIMD[2];
+extern const __m128i g_table256_SIMD[4];
+
+extern const __m128i g_alpha_SIMD[16];
+extern const __m128i g_alphaRange_SIMD;
+#endif
+
+#ifdef __AVX2__
+extern const __m256i g_alpha_AVX[8];
+extern const __m256i g_alphaRange_AVX;
+#endif
+
+#ifdef __ARM_NEON
+extern const int16x8_t g_table128_NEON[2];
+extern const int32x4_t g_table256_NEON[4];
+extern const int16x8_t g_alpha_NEON[16];
+extern const int16x8_t g_alphaRange_NEON;
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/TaskDispatch.cpp b/thirdparty/etcpak/TaskDispatch.cpp
new file mode 100644
index 0000000000..b1ba17953b
--- /dev/null
+++ b/thirdparty/etcpak/TaskDispatch.cpp
@@ -0,0 +1,122 @@
+#include <assert.h>
+#include <stdio.h>
+#ifndef _MSC_VER
+#include <pthread.h>
+#endif
+
+#include "Debug.hpp"
+#include "System.hpp"
+#include "TaskDispatch.hpp"
+
+static TaskDispatch* s_instance = nullptr;
+
+TaskDispatch::TaskDispatch( size_t workers )
+    : m_exit( false )
+    , m_jobs( 0 )
+{
+    assert( !s_instance );
+    s_instance = this;
+
+    assert( workers >= 1 );
+    workers--;
+
+    m_workers.reserve( workers );
+    for( size_t i=0; i<workers; i++ )
+    {
+        char tmp[16];
+        sprintf( tmp, "Worker %zu", i );
+#ifdef _MSC_VER
+        auto worker = std::thread( [this]{ Worker(); } );
+        System::SetThreadName( worker, tmp );
+#else // Using pthread.
+        auto worker = std::thread( [this, tmp]{
+#ifdef __APPLE__
+            pthread_setname_np( tmp );
+#else // Linux or MinGW.
+            pthread_setname_np( pthread_self(), tmp );
+#endif
+            Worker();
+        } );
+#endif
+        m_workers.emplace_back( std::move( worker ) );
+    }
+
+    DBGPRINT( "Task dispatcher with " << m_workers.size() + 1 << " workers" );
+}
+
+TaskDispatch::~TaskDispatch()
+{
+    m_exit = true;
+    m_queueLock.lock();
+    m_cvWork.notify_all();
+    m_queueLock.unlock();
+
+    for( auto& worker : m_workers )
+    {
+        worker.join();
+    }
+
+    assert( s_instance );
+    s_instance = nullptr;
+}
+
+void TaskDispatch::Queue( const std::function<void(void)>& f )
+{
+    std::unique_lock<std::mutex> lock( s_instance->m_queueLock );
+    s_instance->m_queue.emplace_back( f );
+    const auto size = s_instance->m_queue.size();
+    lock.unlock();
+    if( size > 1 )
+    {
+        s_instance->m_cvWork.notify_one();
+    }
+}
+
+void TaskDispatch::Queue( std::function<void(void)>&& f )
+{
+    std::unique_lock<std::mutex> lock( s_instance->m_queueLock );
+    s_instance->m_queue.emplace_back( std::move( f ) );
+    const auto size = s_instance->m_queue.size();
+    lock.unlock();
+    if( size > 1 )
+    {
+        s_instance->m_cvWork.notify_one();
+    }
+}
+
+void TaskDispatch::Sync()
+{
+    std::unique_lock<std::mutex> lock( s_instance->m_queueLock );
+    while( !s_instance->m_queue.empty() )
+    {
+        auto f = s_instance->m_queue.back();
+        s_instance->m_queue.pop_back();
+        lock.unlock();
+        f();
+        lock.lock();
+    }
+    s_instance->m_cvJobs.wait( lock, []{ return s_instance->m_jobs == 0; } );
+}
+
+void TaskDispatch::Worker()
+{
+    for(;;)
+    {
+        std::unique_lock<std::mutex> lock( m_queueLock );
+        m_cvWork.wait( lock, [this]{ return !m_queue.empty() || m_exit; } );
+        if( m_exit ) return;
+        auto f = m_queue.back();
+        m_queue.pop_back();
+        m_jobs++;
+        lock.unlock();
+        f();
+        lock.lock();
+        m_jobs--;
+        bool notify = m_jobs == 0 && m_queue.empty();
+        lock.unlock();
+        if( notify )
+        {
+            m_cvJobs.notify_all();
+        }
+    }
+}
diff --git a/thirdparty/etcpak/TaskDispatch.hpp b/thirdparty/etcpak/TaskDispatch.hpp
new file mode 100644
index 0000000000..b513de4c0c
--- /dev/null
+++ b/thirdparty/etcpak/TaskDispatch.hpp
@@ -0,0 +1,34 @@
+#ifndef __DARKRL__TASKDISPATCH_HPP__
+#define __DARKRL__TASKDISPATCH_HPP__
+
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+class TaskDispatch
+{
+public:
+    TaskDispatch( size_t workers );
+    ~TaskDispatch();
+
+    static void Queue( const std::function<void(void)>& f );
+    static void Queue( std::function<void(void)>&& f );
+
+    static void Sync();
+
+private:
+    void Worker();
+
+    std::vector<std::function<void(void)>> m_queue;
+    std::mutex m_queueLock;
+    std::condition_variable m_cvWork, m_cvJobs;
+    std::atomic<bool> m_exit;
+    size_t m_jobs;
+
+    std::vector<std::thread> m_workers;
+};
+
+#endif
diff --git a/thirdparty/etcpak/Timing.cpp b/thirdparty/etcpak/Timing.cpp
new file mode 100644
index 0000000000..2af851f9a9
--- /dev/null
+++ b/thirdparty/etcpak/Timing.cpp
@@ -0,0 +1,8 @@
+#include <chrono>
+
+#include "Timing.hpp"
+
+uint64_t GetTime()
+{
+    return std::chrono::time_point_cast<std::chrono::microseconds>( std::chrono::high_resolution_clock::now() ).time_since_epoch().count();
+}
diff --git a/thirdparty/etcpak/Timing.hpp b/thirdparty/etcpak/Timing.hpp
new file mode 100644
index 0000000000..3767e20f24
--- /dev/null
+++ b/thirdparty/etcpak/Timing.hpp
@@ -0,0 +1,8 @@
+#ifndef __DARKRL__TIMING_HPP__
+#define __DARKRL__TIMING_HPP__
+
+#include <stdint.h>
+
+uint64_t GetTime();
+
+#endif
diff --git a/thirdparty/etcpak/Vector.hpp b/thirdparty/etcpak/Vector.hpp
new file mode 100644
index 0000000000..3370a88aea
--- /dev/null
+++ b/thirdparty/etcpak/Vector.hpp
@@ -0,0 +1,222 @@
+#ifndef __DARKRL__VECTOR_HPP__
+#define __DARKRL__VECTOR_HPP__
+
+#include <assert.h>
+#include <algorithm>
+#include <math.h>
+#include <stdint.h>
+
+#include "Math.hpp"
+
+template<class T>
+struct Vector2
+{
+    Vector2() : x( 0 ), y( 0 ) {}
+    Vector2( T v ) : x( v ), y( v ) {}
+    Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
+
+    bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
+    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
+
+    Vector2<T>& operator+=( const Vector2<T>& rhs )
+    {
+        x += rhs.x;
+        y += rhs.y;
+        return *this;
+    }
+    Vector2<T>& operator-=( const Vector2<T>& rhs )
+    {
+        x -= rhs.x;
+        y -= rhs.y;
+        return *this;
+    }
+    Vector2<T>& operator*=( const Vector2<T>& rhs )
+    {
+        x *= rhs.x;
+        y *= rhs.y;
+        return *this;
+    }
+
+    T x, y;
+};
+
+template<class T>
+Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
+{
+    return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
+}
+
+template<class T>
+Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
+{
+    return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
+}
+
+template<class T>
+Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
+{
+    return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
+}
+
+template<class T>
+Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
+{
+    return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
+}
+
+
+typedef Vector2<int32_t> v2i;
+typedef Vector2<float> v2f;
+
+
+template<class T>
+struct Vector3
+{
+    Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
+    Vector3( T v ) : x( v ), y( v ), z( v ) {}
+    Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
+    template<class Y>
+    Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
+
+    T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
+    void Clamp()
+    {
+        x = std::min( T(1), std::max( T(0), x ) );
+        y = std::min( T(1), std::max( T(0), y ) );
+        z = std::min( T(1), std::max( T(0), z ) );
+    }
+
+    bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
+    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
+
+    T& operator[]( unsigned int idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
+    const T& operator[]( unsigned int idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
+
+    Vector3<T> operator+=( const Vector3<T>& rhs )
+    {
+        x += rhs.x;
+        y += rhs.y;
+        z += rhs.z;
+        return *this;
+    }
+
+    Vector3<T> operator*=( const Vector3<T>& rhs )
+    {
+        x *= rhs.x;
+        y *= rhs.y;
+        z *= rhs.z;
+        return *this;
+    }
+
+    Vector3<T> operator*=( const float& rhs )
+    {
+        x *= rhs;
+        y *= rhs;
+        z *= rhs;
+        return *this;
+    }
+
+    T x, y, z;
+    T padding;
+};
+
+template<class T>
+Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
+}
+
+template<class T>
+Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
+}
+
+template<class T>
+Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
+}
+
+template<class T>
+Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
+{
+    return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
+}
+
+template<class T>
+Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
+{
+    return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
+}
+
+template<class T>
+bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return lhs.Luminance() < rhs.Luminance();
+}
+
+typedef Vector3<int32_t> v3i;
+typedef Vector3<float> v3f;
+typedef Vector3<uint8_t> v3b;
+
+
+static inline v3b v3f_to_v3b( const v3f& v )
+{
+    return v3b( uint8_t( std::min( 1.f, v.x ) * 255 ), uint8_t( std::min( 1.f, v.y ) * 255 ), uint8_t( std::min( 1.f, v.z ) * 255 ) );
+}
+
+template<class T>
+Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
+{
+    return v1 + ( v2 - v1 ) * amount;
+}
+
+template<>
+inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
+{
+    return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
+}
+
+template<class T>
+Vector3<T> Desaturate( const Vector3<T>& v )
+{
+    T l = v.Luminance();
+    return Vector3<T>( l, l, l );
+}
+
+template<class T>
+Vector3<T> Desaturate( const Vector3<T>& v, float mul )
+{
+    T l = T( v.Luminance() * mul );
+    return Vector3<T>( l, l, l );
+}
+
+template<class T>
+Vector3<T> pow( const Vector3<T>& base, float exponent )
+{
+    return Vector3<T>(
+        pow( base.x, exponent ),
+        pow( base.y, exponent ),
+        pow( base.z, exponent ) );
+}
+
+template<class T>
+Vector3<T> sRGB2linear( const Vector3<T>& v )
+{
+    return Vector3<T>(
+        sRGB2linear( v.x ),
+        sRGB2linear( v.y ),
+        sRGB2linear( v.z ) );
+}
+
+template<class T>
+Vector3<T> linear2sRGB( const Vector3<T>& v )
+{
+    return Vector3<T>(
+        linear2sRGB( v.x ),
+        linear2sRGB( v.y ),
+        linear2sRGB( v.z ) );
+}
+
+#endif
diff --git a/thirdparty/etcpak/lz4/lz4.c b/thirdparty/etcpak/lz4/lz4.c
new file mode 100644
index 0000000000..08cf6b5cd7
--- /dev/null
+++ b/thirdparty/etcpak/lz4/lz4.c
@@ -0,0 +1,1516 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/*
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#define HEAPMODE 0
+
+/*
+ * ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define ACCELERATION_DEFAULT 1
+
+
+/**************************************
+*  CPU Feature Detection
+**************************************/
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include "lz4.h"
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
+#else
+#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+#    if defined(__GNUC__) || defined(__clang__)
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif   /* __STDC_VERSION__ */
+#endif  /* _MSC_VER */
+
+/* LZ4_GCC_VERSION is defined into lz4.h */
+#if (LZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#define likely(expr)     expect((expr) != 0, 1)
+#define unlikely(expr)   expect((expr) != 0, 0)
+
+
+/**************************************
+*  Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT       memset
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Reading and writing into memory
+**************************************/
+#define STEPSIZE sizeof(size_t)
+
+static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val16;
+    memcpy(&val16, memPtr, 2);
+    return val16;
+}
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian())
+    {
+        return LZ4_read16(memPtr);
+    }
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian())
+    {
+        memcpy(memPtr, &value, 2);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val32;
+    memcpy(&val32, memPtr, 4);
+    return val32;
+}
+
+static U64 LZ4_read64(const void* memPtr)
+{
+    U64 val64;
+    memcpy(&val64, memPtr, 8);
+    return val64;
+}
+
+static size_t LZ4_read_ARCH(const void* p)
+{
+    if (LZ4_64bits())
+        return (size_t)LZ4_read64(p);
+    else
+        return (size_t)LZ4_read32(p);
+}
+
+
+static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); }
+
+static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); }
+
+/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* e = (BYTE*)dstEnd;
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/**************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/**************************************
+*  Common Utils
+**************************************/
+#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/**************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (register size_t val)
+{
+    if (LZ4_isLittleEndian())
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll((U64)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while (likely(pIn<pInLimit-(STEPSIZE-1)))
+    {
+        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/**************************************
+*  Local Constants
+**************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/**************************************
+*  Local Structures and types
+**************************************/
+typedef struct {
+    U32 hashTable[HASH_SIZE_U32];
+    U32 currentOffset;
+    U32 initCheck;
+    const BYTE* dictionary;
+    BYTE* bufferStart;   /* obsolete, used for slideInputBuffer */
+    U32 dictSize;
+} LZ4_stream_t_internal;
+
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+/**************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+
+
+
+/********************************
+*  Compression functions
+********************************/
+
+static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static const U64 prime5bytes = 889523592379ULL;
+static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    const U32 hashMask = (1<<hashLog) - 1;
+    return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
+}
+
+static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
+{
+    if (LZ4_64bits())
+        return LZ4_hashSequence64(sequence, tableType);
+    return LZ4_hashSequence((U32)sequence, tableType);
+}
+
+static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+FORCE_INLINE int LZ4_compress_generic(
+                 void* const ctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputLimited,
+                 const tableType_t tableType,
+                 const dict_directive dict,
+                 const dictIssue_directive dictIssue,
+                 const U32 acceleration)
+{
+    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
+    const BYTE* const dictionary = dictPtr->dictionary;
+    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+    const size_t dictDelta = dictEnd - (const BYTE*)source;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 forwardH;
+    size_t refDelta=0;
+
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported input size, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source - dictPtr->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; )
+    {
+        const BYTE* match;
+        BYTE* token;
+        {
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+
+            /* Find a match */
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                if (dict==usingExtDict)
+                {
+                    if (match<(const BYTE*)source)
+                    {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    }
+                    else
+                    {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                    }
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        {
+            /* Encode Literal length */
+            unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;   /* Check output limit */
+            if (litLength>=RUN_MASK)
+            {
+                int len = (int)litLength-RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {
+            unsigned matchLength;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary))
+            {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchLength;
+                if (ip==limit)
+                {
+                    unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchLength += more;
+                    ip += more;
+                }
+            }
+            else
+            {
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchLength;
+            }
+
+            if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
+                return 0;    /* Check output limit */
+            if (matchLength>=ML_MASK)
+            {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
+                if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        if (dict==usingExtDict)
+        {
+            if (match<(const BYTE*)source)
+            {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            }
+            else
+            {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+            }
+        }
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {
+        const size_t lastRun = (size_t)(iend - anchor);
+        if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
+            return 0;   /* Check output limit */
+        if (lastRun >= RUN_MASK)
+        {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        }
+        else
+        {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRun);
+        op += lastRun;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    if (maxOutputSize >= LZ4_compressBound(inputSize))
+    {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+    else
+    {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+#if (HEAPMODE)
+    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#else
+    LZ4_stream_t ctx;
+    void* ctxPtr = &ctx;
+#endif
+
+    int result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (HEAPMODE)
+    FREEMEM(ctxPtr);
+#endif
+    return result;
+}
+
+
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
+}
+
+
+/* hidden debug function */
+/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
+int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t ctx;
+
+    LZ4_resetStream(&ctx);
+
+    if (inputSize < LZ4_64Klimit)
+        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+    else
+        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+}
+
+
+/********************************
+*  destSize variant
+********************************/
+
+static int LZ4_compress_destSize_generic(
+                       void* const ctx,
+                 const char* const src,
+                       char* const dst,
+                       int*  const srcSizePtr,
+                 const int targetDstSize,
+                 const tableType_t tableType)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* base = (const BYTE*) src;
+    const BYTE* lowLimit = (const BYTE*) src;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dst;
+    BYTE* const oend = op + targetDstSize;
+    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
+    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
+    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
+
+    U32 forwardH;
+
+
+    /* Init conditions */
+    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
+    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    *srcSizePtr = 0;
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; )
+    {
+        const BYTE* match;
+        BYTE* token;
+        {
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
+
+            /* Find a match */
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit))
+                    goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        {
+            /* Encode Literal length */
+            unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if (op + ((litLength+240)/255) + litLength > oMaxLit)
+            {
+                /* Not enough space for a last match */
+                op--;
+                goto _last_literals;
+            }
+            if (litLength>=RUN_MASK)
+            {
+                unsigned len = litLength - RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op += litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {
+            size_t matchLength;
+
+            matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+
+            if (op + ((matchLength+240)/255) > oMaxMatch)
+            {
+                /* Match description too long : reduce it */
+                matchLength = (15-1) + (oMaxMatch-op) * 255;
+            }
+            //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
+            ip += MINMATCH + matchLength;
+
+            if (matchLength>=ML_MASK)
+            {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of block */
+        if (ip > mflimit) break;
+        if (op > oMaxSeq) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {
+        size_t lastRunSize = (size_t)(iend - anchor);
+        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend)
+        {
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (oend-op) - 1;
+            lastRunSize -= (lastRunSize+240)/255;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK)
+        {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        }
+        else
+        {
+            *op++ = (BYTE)(lastRunSize<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip)-src);
+    return (int) (((char*)op)-dst);
+}
+
+
+static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr))   /* compression success is guaranteed */
+    {
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    }
+    else
+    {
+        if (*srcSizePtr < LZ4_64Klimit)
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
+        else
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr);
+    }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#else
+    LZ4_stream_t ctxBody;
+    void* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+
+/********************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    LZ4_resetStream(lz4s);
+    return lz4s;
+}
+
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+#define HASH_UNIT sizeof(size_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
+        LZ4_resetStream(LZ4_dict);
+
+    if (dictSize < (int)HASH_UNIT)
+    {
+        dict->dictionary = NULL;
+        dict->dictSize = 0;
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    dict->currentOffset += 64 KB;
+    base = p - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += dict->dictSize;
+
+    while (p <= dictEnd-HASH_UNIT)
+    {
+        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        p+=3;
+    }
+
+    return dict->dictSize;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((size_t)LZ4_dict->currentOffset > (size_t)src))   /* address space overflow */
+    {
+        /* rescale hash table */
+        U32 delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<HASH_SIZE_U32; i++)
+        {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
+        {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source)
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+
+    /* external dictionary mode */
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+}
+
+
+/* Hidden debug function, to force external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
+}
+
+
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/*******************************
+*  Decompression functions
+*******************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+    /* Main Loop */
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        {
+            unsigned s;
+            do
+            {
+                s = *ip++;
+                length += s;
+            }
+            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        match = cpy - LZ4_readLE16(ip); ip+=2;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK)
+        {
+            unsigned s;
+            do
+            {
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                s = *ip++;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix))
+        {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match))
+            {
+                /* match can be copied as a single segment from external dictionary */
+                match = dictEnd - (lowPrefix-match);
+                memmove(op, match, length); op += length;
+            }
+            else
+            {
+                /* match encompass external dictionary and current segment */
+                size_t copySize = (size_t)(lowPrefix-match);
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                copySize = length - copySize;
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap within current segment */
+                {
+                    BYTE* const endOfMatch = op + copySize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                }
+                else
+                {
+                    memcpy(op, lowPrefix, copySize);
+                    op += copySize;
+                }
+            }
+            continue;
+        }
+
+        /* copy repeated sequence */
+        cpy = op + length;
+        if (unlikely((op-match)<8))
+        {
+            const size_t dec64 = dec64table[op-match];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[op-match];
+            LZ4_copy4(op+4, match);
+            op += 8; match -= dec64;
+        } else { LZ4_copy8(op, match); op+=8; match+=8; }
+
+        if (unlikely(cpy>oend-12))
+        {
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals */
+            if (op < oend-8)
+            {
+                LZ4_wildCopy(op, match, oend-8);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<cpy) *op++ = *match++;
+        }
+        else
+            LZ4_wildCopy(op, match, cpy);
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
+_output_error:
+    return (int) (-(((const char*)ip)-source))-1;
+}
+
+
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+}
+
+
+/* streaming decompression functions */
+
+typedef struct
+{
+    const BYTE* externalDict;
+    size_t extDictSize;
+    const BYTE* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode()
+ * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    if (dictStart+dictSize == dest)
+    {
+        if (dictSize >= (int)(64 KB - 1))
+            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
+    }
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+
+/***************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
+int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+
+/*
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+
+static void LZ4_init(LZ4_stream_t_internal* lz4ds, BYTE* base)
+{
+    MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE);
+    lz4ds->bufferStart = base;
+}
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    if ((((size_t)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
+    LZ4_init((LZ4_stream_t_internal*)state, (BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_init ((LZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data;
+    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
+    return (char*)(ctx->bufferStart + dictSize);
+}
+
+/* Obsolete streaming decompression functions */
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
+
diff --git a/thirdparty/etcpak/lz4/lz4.h b/thirdparty/etcpak/lz4/lz4.h
new file mode 100644
index 0000000000..3e74002256
--- /dev/null
+++ b/thirdparty/etcpak/lz4/lz4.h
@@ -0,0 +1,360 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * lz4.h provides block compression functions, and gives full buffer control to programmer.
+ * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
+ * and can let the library handle its own memory, please use lz4frame.h instead.
+*/
+
+/**************************************
+*  Version
+**************************************/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+int LZ4_versionNumber (void);
+
+/**************************************
+*  Tuning parameter
+**************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+
+/**************************************
+*  Simple Functions
+**************************************/
+
+int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*
+LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails
+
+LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
+*/
+
+
+/**************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+int LZ4_compressBound(int inputSize);
+
+/*
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+*/
+int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_fast_extState() :
+    Same compression function, just using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and allocate it on 8-bytes boundaries (using malloc() typically).
+    Then, provide it as 'void* state' to compression function.
+*/
+int LZ4_sizeofState(void);
+int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+
+
+/*
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/***********************************************
+*  Streaming Compression Functions
+***********************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+/*
+ * LZ4_stream_t
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are statically linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+/*
+ * LZ4_resetStream
+ * Use this function to init an allocated LZ4_stream_t structure
+ */
+void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
+ * LZ4_freeStream releases its memory.
+ * In the context of a DLL (liblz4), please use these methods rather than the static struct.
+ * They are more future proof, in case of a change of LZ4_stream_t size.
+ */
+LZ4_stream_t* LZ4_createStream(void);
+int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_loadDict
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ * Loading a size of 0 is allowed.
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*
+ * LZ4_compress_fast_continue
+ * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still be present and unmodified !
+ * 'dst' buffer must be already allocated.
+ * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict
+ * If previously compressed data block is not guaranteed to remain available at its memory location
+ * save it into a safer place (char* safeBuffer)
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
+ */
+int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+
+
+/************************************************
+*  Streaming Decompression Functions
+************************************************/
+
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+/*
+ * LZ4_streamDecode_t
+ * information structure to track an LZ4 stream.
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ *
+ * In the context of a DLL (liblz4) please prefer usage of construction methods below.
+ * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+    In the case of a ring buffers, decoding buffer must be either :
+    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
+      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including small ones ( < 64 KB).
+    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including larger than decoding buffer.
+    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
+    and indicate where it is saved using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as
+    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
+    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
+*/
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+
+/**************************************
+*  Obsolete Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
+
+/* Obsolete compression functions */
+/* These functions are planned to start generate warnings by r131 approximately */
+int LZ4_compress               (const char* source, char* dest, int sourceSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete decompression functions */
+/* These function names are completely deprecated and must no longer be used.
+   They are only provided here for compatibility with older programs.
+    - LZ4_uncompress is the same as LZ4_decompress_fast
+    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+   These function prototypes are now disabled; uncomment them only if you really need them.
+   It is highly recommended to stop using these prototypes and migrate to maintained ones */
+/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
+/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/thirdparty/etcpak/mmap.cpp b/thirdparty/etcpak/mmap.cpp
new file mode 100644
index 0000000000..c2460ee9e4
--- /dev/null
+++ b/thirdparty/etcpak/mmap.cpp
@@ -0,0 +1,38 @@
+#include "mmap.hpp"
+
+#ifdef _WIN32
+#  include <io.h>
+#  include <windows.h>
+
+void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset )
+{
+    HANDLE hnd;
+    void* map = nullptr;
+
+    switch( prot )
+    {
+    case PROT_READ:
+        if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READONLY, 0, DWORD( length ), nullptr ) )
+        {
+            map = MapViewOfFile( hnd, FILE_MAP_READ, 0, 0, length );
+            CloseHandle( hnd );
+        }
+        break;
+    case PROT_WRITE:
+        if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READWRITE, 0, DWORD( length ), nullptr ) )
+        {
+            map = MapViewOfFile( hnd, FILE_MAP_WRITE, 0, 0, length );
+            CloseHandle( hnd );
+        }
+        break;
+    }
+
+    return map ? (char*)map + offset : (void*)-1;
+}
+
+int munmap( void* addr, size_t length )
+{
+    return UnmapViewOfFile( addr ) != 0 ? 0 : -1;
+}
+
+#endif
diff --git a/thirdparty/etcpak/mmap.hpp b/thirdparty/etcpak/mmap.hpp
new file mode 100644
index 0000000000..e4cfe7759c
--- /dev/null
+++ b/thirdparty/etcpak/mmap.hpp
@@ -0,0 +1,19 @@
+#ifndef __MMAP_HPP__
+#define __MMAP_HPP__
+
+#ifndef _WIN32
+#  include <sys/mman.h>
+#else
+#  include <string.h>
+#  include <sys/types.h>
+
+#  define PROT_READ 1
+#  define PROT_WRITE 2
+#  define MAP_SHARED 0
+
+void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset );
+int munmap( void* addr, size_t length );
+
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/patches/libpng-unbundle.patch b/thirdparty/etcpak/patches/libpng-unbundle.patch
new file mode 100644
index 0000000000..e3c07412c6
--- /dev/null
+++ b/thirdparty/etcpak/patches/libpng-unbundle.patch
@@ -0,0 +1,13 @@
+diff --git a/thirdparty/etcpak/Bitmap.cpp b/thirdparty/etcpak/Bitmap.cpp
+index 6aa36f5caa..ef318318ac 100644
+--- a/thirdparty/etcpak/Bitmap.cpp
++++ b/thirdparty/etcpak/Bitmap.cpp
+@@ -3,7 +3,7 @@
+ #include <string.h>
+ #include <assert.h>
+ 
+-#include "libpng/png.h"
++#include <png.h>
+ #include "lz4/lz4.h"
+ 
+ #include "Bitmap.hpp"
diff --git a/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch b/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch
new file mode 100644
index 0000000000..ab0d1e63a2
--- /dev/null
+++ b/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch
@@ -0,0 +1,64 @@
+diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp
+index bd738085f3..395b55246b 100644
+--- a/thirdparty/etcpak/BlockData.cpp
++++ b/thirdparty/etcpak/BlockData.cpp
+@@ -334,10 +334,10 @@ static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t
+     const auto c3b = clampu8( cb1 - table59T58H[codeword] );
+ 
+     const uint32_t col_tab[4] = {
+-        cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000,
+-        c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000,
+-        cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000,
+-        c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000
++        uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000),
++        uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000),
++        uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000),
++        uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000)
+     };
+ 
+     const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+@@ -389,10 +389,10 @@ static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, ui
+     const auto c3b = clampu8( cb1 - table59T58H[codeword] );
+ 
+     const uint32_t col_tab[4] = {
+-        cr0 | ( cg0 << 8 ) | ( cb0 << 16 ),
+-        c2r | ( c2g << 8 ) | ( c2b << 16 ),
+-        cr1 | ( cg1 << 8 ) | ( cb1 << 16 ),
+-        c3r | ( c3g << 8 ) | ( c3b << 16 )
++        uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 )),
++        uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 )),
++        uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 )),
++        uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ))
+     };
+ 
+     const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
+@@ -436,10 +436,10 @@ static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t
+     const auto codeword = codeword_hi | codeword_lo;
+ 
+     const uint32_t col_tab[] = {
+-        clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ),
+-        clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ),
+-        clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ),
+-        clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )
++        uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ))
+     };
+ 
+     for( uint8_t j = 0; j < 4; j++ )
+@@ -483,10 +483,10 @@ static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, ui
+     const auto tbl = g_alpha[(alpha >> 48) & 0xF];
+ 
+     const uint32_t col_tab[] = {
+-        clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ),
+-        clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ),
+-        clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ),
+-        clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )
++        uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )),
++        uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ))
+     };
+ 
+     for( uint8_t j = 0; j < 4; j++ )
diff --git a/thirdparty/etcpak/patches/pthread-setname.patch b/thirdparty/etcpak/patches/pthread-setname.patch
new file mode 100644
index 0000000000..e2b009a1b3
--- /dev/null
+++ b/thirdparty/etcpak/patches/pthread-setname.patch
@@ -0,0 +1,66 @@
+diff --git a/thirdparty/etcpak/System.cpp b/thirdparty/etcpak/System.cpp
+index 1383d0ecd0..041f2676e8 100644
+--- a/thirdparty/etcpak/System.cpp
++++ b/thirdparty/etcpak/System.cpp
+@@ -2,7 +2,6 @@
+ #ifdef _WIN32
+ #  include <windows.h>
+ #else
+-#  include <pthread.h>
+ #  include <unistd.h>
+ #endif
+ 
+@@ -35,7 +34,7 @@ unsigned int System::CPUCores()
+ 
+ void System::SetThreadName( std::thread& thread, const char* name )
+ {
+-#ifdef _WIN32
++#ifdef _MSC_VER
+     const DWORD MS_VC_EXCEPTION=0x406D1388;
+ 
+ #  pragma pack( push, 8 )
+@@ -62,7 +61,5 @@ void System::SetThreadName( std::thread& thread, const char* name )
+     __except(EXCEPTION_EXECUTE_HANDLER)
+     {
+     }
+-#elif !defined(__APPLE__)
+-    pthread_setname_np( thread.native_handle(), name );
+ #endif
+ }
+diff --git a/thirdparty/etcpak/TaskDispatch.cpp b/thirdparty/etcpak/TaskDispatch.cpp
+index 7287da4de2..b1ba17953b 100644
+--- a/thirdparty/etcpak/TaskDispatch.cpp
++++ b/thirdparty/etcpak/TaskDispatch.cpp
+@@ -1,5 +1,8 @@
+ #include <assert.h>
+ #include <stdio.h>
++#ifndef _MSC_VER
++#include <pthread.h>
++#endif
+ 
+ #include "Debug.hpp"
+ #include "System.hpp"
+@@ -22,15 +25,19 @@ TaskDispatch::TaskDispatch( size_t workers )
+     {
+         char tmp[16];
+         sprintf( tmp, "Worker %zu", i );
+-#ifdef __APPLE__
++#ifdef _MSC_VER
++        auto worker = std::thread( [this]{ Worker(); } );
++        System::SetThreadName( worker, tmp );
++#else // Using pthread.
+         auto worker = std::thread( [this, tmp]{
++#ifdef __APPLE__
+             pthread_setname_np( tmp );
++#else // Linux or MinGW.
++            pthread_setname_np( pthread_self(), tmp );
++#endif
+             Worker();
+         } );
+-#else
+-        auto worker = std::thread( [this]{ Worker(); } );
+ #endif
+-        System::SetThreadName( worker, tmp );
+         m_workers.emplace_back( std::move( worker ) );
+     }
+ 
diff --git a/thirdparty/etcpak/patches/windows-mingw-bswap.patch b/thirdparty/etcpak/patches/windows-mingw-bswap.patch
new file mode 100644
index 0000000000..c09192f573
--- /dev/null
+++ b/thirdparty/etcpak/patches/windows-mingw-bswap.patch
@@ -0,0 +1,50 @@
+diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp
+index a2cd032c5b..bd738085f3 100644
+--- a/thirdparty/etcpak/BlockData.cpp
++++ b/thirdparty/etcpak/BlockData.cpp
+@@ -15,7 +15,7 @@
+ #  include <arm_neon.h>
+ #endif
+ 
+-#ifdef __SSE4_1__
++#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
+ #  ifdef _MSC_VER
+ #    include <intrin.h>
+ #    include <Windows.h>
+@@ -24,12 +24,6 @@
+ #  else
+ #    include <x86intrin.h>
+ #  endif
+-#else
+-#  ifndef _MSC_VER
+-#    include <byteswap.h>
+-#    define _bswap(x) bswap_32(x)
+-#    define _bswap64(x) bswap_64(x)
+-#  endif
+ #endif
+ 
+ #ifndef _bswap
+diff --git a/thirdparty/etcpak/ProcessRGB.cpp b/thirdparty/etcpak/ProcessRGB.cpp
+index 220d5c55e2..9dc5a78b67 100644
+--- a/thirdparty/etcpak/ProcessRGB.cpp
++++ b/thirdparty/etcpak/ProcessRGB.cpp
+@@ -1,5 +1,6 @@
+ #include <array>
+ #include <string.h>
++#include <limits>
+ 
+ #ifdef __ARM_NEON
+ #  include <arm_neon.h>
+@@ -21,12 +22,6 @@
+ #  else
+ #    include <x86intrin.h>
+ #  endif
+-#else
+-#  ifndef _MSC_VER
+-#    include <byteswap.h>
+-#    define _bswap(x) bswap_32(x)
+-#    define _bswap64(x) bswap_64(x)
+-#  endif
+ #endif
+ 
+ #ifndef _bswap