338 files changed, 10296 insertions, 16068 deletions
diff --git a/.github/workflows/linux_builds.yml b/.github/workflows/linux_builds.yml
index 15a7be9c4f..7b144e6e43 100644
--- a/.github/workflows/linux_builds.yml
+++ b/.github/workflows/linux_builds.yml
@@ -76,9 +76,9 @@ jobs:
           path: bin/*
           retention-days: 14
 
-  linux-editor-sanitizers-mono:
+  linux-editor-sanitizers:
     runs-on: "ubuntu-20.04"
-    name: Editor w/ Mono and sanitizers (target=debug, tools=yes, tests=yes, use_asan=yes, use_ubsan=yes)
+    name: Editor and sanitizers (target=debug, tools=yes, tests=yes, use_asan=yes, use_ubsan=yes)
 
     steps:
       - uses: actions/checkout@v2
@@ -94,7 +94,8 @@ jobs:
       - name: Configure dependencies
         run: |
           sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \
-            libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev libudev-dev libxi-dev libxrandr-dev yasm
+            libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev libudev-dev libxi-dev libxrandr-dev yasm \
+            xvfb wget unzip
 
       # Upload cache on completion and check it out now
       - name: Load .scons_cache directory
@@ -126,17 +127,47 @@ jobs:
           scons --version
 
       # We should always be explicit with our flags usage here since it's gonna be sure to always set those flags
+      # [Workaround] SwiftShader doesn't support tesselation, so we skip Godot check about it
       - name: Compilation
         env:
           SCONS_CACHE: ${{github.workspace}}/.scons_cache/
         run: |
-          scons tools=yes tests=yes target=debug module_mono_enabled=yes mono_glue=no use_asan=yes use_ubsan=yes
+          sed -i "s|ERR_FAIL_COND_V(p_rasterization_state.patch_control_points|//ERR_FAIL_COND_V(p_rasterization_state.patch_control_points|" drivers/vulkan/rendering_device_vulkan.cpp
+          scons tools=yes tests=yes target=debug debug_symbols=no use_asan=yes use_ubsan=yes
           ls -l bin/
 
       # Execute unit tests for the editor
       - name: Unit Tests
         run: |
-          ./bin/godot.linuxbsd.tools.64s.mono --test
+          ./bin/godot.linuxbsd.tools.64s --test
+
+      # Download, unzip and setup SwiftShader library [d4550ab8d3f]
+      - name: Download SwiftShader
+        run: |
+          wget https://github.com/qarmin/gtk_library_store/releases/download/3.24.0/swiftshader.zip
+          unzip swiftshader.zip
+          rm swiftshader.zip
+          curr="$(pwd)/libvk_swiftshader.so"
+          sed -i "s|PATH_TO_CHANGE|$curr|" vk_swiftshader_icd.json
+
+      # Download and extract zip archive with project, folder is renamed to be able to easy change used project
+      - name: Download test project
+        run: |
+          wget https://github.com/qarmin/RegressionTestProject/archive/4.0.zip
+          unzip 4.0.zip
+          mv "RegressionTestProject-4.0" "test_project"
+
+      # Editor is quite complicated piece of software, so it is easy to introduce bug here
+      - name: Open and close editor
+        run: |
+          VK_ICD_FILENAMES=$(pwd)/vk_swiftshader_icd.json DRI_PRIME=0 xvfb-run bin/godot.linuxbsd.tools.64s --audio-driver Dummy -e -q --path test_project 2>&1 | tee sanitizers_log.txt || true
+          misc/scripts/check_ci_log.py sanitizers_log.txt
+
+      # Run test project
+      - name: Run project
+        run: |
+          VK_ICD_FILENAMES=$(pwd)/vk_swiftshader_icd.json DRI_PRIME=0 xvfb-run bin/godot.linuxbsd.tools.64s 40 --audio-driver Dummy --path test_project 2>&1 | tee sanitizers_log.txt || true
+          misc/scripts/check_ci_log.py sanitizers_log.txt
 
   linux-template-mono:
     runs-on: "ubuntu-20.04"
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
index 6684978318..3bb9c916fd 100644
--- a/COPYRIGHT.txt
+++ b/COPYRIGHT.txt
@@ -89,6 +89,8 @@ Files: ./servers/physics_3d/gjk_epa.cpp
  ./servers/physics_3d/joints/slider_joint_3d_sw.h
  ./servers/physics_3d/soft_body_3d_sw.cpp
  ./servers/physics_3d/soft_body_3d_sw.h
+ ./servers/physics_3d/shape_3d_sw.cpp
+ ./servers/physics_3d/shape_3d_sw.h
 Comment: Bullet Continuous Collision Detection and Physics Library
 Copyright: 2003-2008, Erwin Coumans
  2007-2021, Juan Linietsky, Ariel Manzur.
@@ -134,10 +136,10 @@ Comment: ENet
 Copyright: 2002-2020, Lee Salzman
 License: Expat
 
-Files: ./thirdparty/etc2comp/
-Comment: Etc2Comp
-Copyright: 2015, Etc2Comp Authors
-License: Apache-2.0
+Files: ./thirdparty/etcpak/
+Comment: etcpak
+Copyright: 2013-2021, Bartosz Taudul
+License: BSD-3-clause
 
 Files: ./thirdparty/fonts/DroidSans*.ttf
 Comment: DroidSans font
@@ -261,7 +263,7 @@ License: Apache-2.0
 
 Files: ./thirdparty/meshoptimizer/
 Comment: meshoptimizer
-Copyright: 2016-2020, Arseny Kapoulkine
+Copyright: 2016-2021, Arseny Kapoulkine
 License: Expat
 
 Files: ./thirdparty/minimp3/
diff --git a/SConstruct b/SConstruct
index 3edf81129b..2d9802f293 100644
--- a/SConstruct
+++ b/SConstruct
@@ -137,6 +137,7 @@ opts.Add("extra_suffix", "Custom extra suffix added to the base filename of all
 opts.Add(BoolVariable("vsproj", "Generate a Visual Studio solution", False))
 opts.Add(BoolVariable("disable_3d", "Disable 3D nodes for a smaller executable", False))
 opts.Add(BoolVariable("disable_advanced_gui", "Disable advanced GUI nodes and behaviors", False))
+opts.Add(BoolVariable("modules_enabled_by_default", "If no, disable all modules except ones explicitly enabled", True))
 opts.Add(BoolVariable("no_editor_splash", "Don't use the custom splash screen for the editor", False))
 opts.Add("system_certs_path", "Use this path as SSL certificates default for editor (for package maintainers)", "")
 opts.Add(BoolVariable("use_precise_math_checks", "Math checks use very precise epsilon (debug option)", False))
@@ -259,16 +260,21 @@ for path in module_search_paths:
 
 # Add module options.
 for name, path in modules_detected.items():
-    enabled = True
-    sys.path.insert(0, path)
-    import config
-
-    try:
-        enabled = config.is_enabled()
-    except AttributeError:
-        pass
-    sys.path.remove(path)
-    sys.modules.pop("config")
+    if env_base["modules_enabled_by_default"]:
+        enabled = True
+
+        sys.path.insert(0, path)
+        import config
+
+        try:
+            enabled = config.is_enabled()
+        except AttributeError:
+            pass
+        sys.path.remove(path)
+        sys.modules.pop("config")
+    else:
+        enabled = False
+
     opts.Add(BoolVariable("module_" + name + "_enabled", "Enable module '%s'" % (name,), enabled))
 
 methods.write_modules(modules_detected)
diff --git a/core/config/project_settings.cpp b/core/config/project_settings.cpp
index f87dc6704e..25dd408dce 100644
--- a/core/config/project_settings.cpp
+++ b/core/config/project_settings.cpp
@@ -467,16 +467,17 @@ Error ProjectSettings::_setup(const String &p_path, const String &p_main_pack, b
 	d->change_dir(p_path);
 
 	String current_dir = d->get_current_dir();
-	String candidate = current_dir;
 	bool found = false;
 	Error err;
 
 	while (true) {
+		// Set the resource path early so things can be resolved when loading.
+		resource_path = current_dir;
+		resource_path = resource_path.replace("\\", "/"); // Windows path to Unix path just in case.
 		err = _load_settings_text_or_binary(current_dir.plus_file("project.godot"), current_dir.plus_file("project.binary"));
 		if (err == OK) {
 			// Optional, we don't mind if it fails.
 			_load_settings_text(current_dir.plus_file("override.cfg"));
-			candidate = current_dir;
 			found = true;
 			break;
 		}
@@ -493,8 +494,6 @@ Error ProjectSettings::_setup(const String &p_path, const String &p_main_pack, b
 		}
 	}
 
-	resource_path = candidate;
-	resource_path = resource_path.replace("\\", "/"); // Windows path to Unix path just in case.
 	memdelete(d);
 
 	if (!found) {
diff --git a/core/input/input.cpp b/core/input/input.cpp
index 627944210f..2304c05bf8 100644
--- a/core/input/input.cpp
+++ b/core/input/input.cpp
@@ -1329,9 +1329,10 @@ void Input::add_joy_mapping(String p_mapping, bool p_update_existing) {
 	if (p_update_existing) {
 		Vector<String> entry = p_mapping.split(",");
 		String uid = entry[0];
-		for (int i = 0; i < joy_names.size(); i++) {
-			if (uid == joy_names[i].uid) {
-				joy_names[i].mapping = map_db.size() - 1;
+		for (Map<int, Joypad>::Element *E = joy_names.front(); E; E = E->next()) {
+			Joypad &joy = E->get();
+			if (joy.uid == uid) {
+				joy.mapping = map_db.size() - 1;
 			}
 		}
 	}
@@ -1343,9 +1344,10 @@ void Input::remove_joy_mapping(String p_guid) {
 			map_db.remove(i);
 		}
 	}
-	for (int i = 0; i < joy_names.size(); i++) {
-		if (joy_names[i].uid == p_guid) {
-			joy_names[i].mapping = -1;
+	for (Map<int, Joypad>::Element *E = joy_names.front(); E; E = E->next()) {
+		Joypad &joy = E->get();
+		if (joy.uid == p_guid) {
+			joy.mapping = -1;
 		}
 	}
 }
@@ -1361,8 +1363,13 @@ void Input::set_fallback_mapping(String p_guid) {
 
 //platforms that use the remapping system can override and call to these ones
 bool Input::is_joy_known(int p_device) {
-	int mapping = joy_names[p_device].mapping;
-	return mapping != -1 ? (mapping != fallback_mapping) : false;
+	if (joy_names.has(p_device)) {
+		int mapping = joy_names[p_device].mapping;
+		if (mapping != -1 && mapping != fallback_mapping) {
+			return true;
+		}
+	}
+	return false;
 }
 
 String Input::get_joy_guid(int p_device) const {
diff --git a/core/input/input_map.cpp b/core/input/input_map.cpp
index 7d85fd6492..aab4e6593c 100644
--- a/core/input/input_map.cpp
+++ b/core/input/input_map.cpp
@@ -54,8 +54,36 @@ void InputMap::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("load_from_project_settings"), &InputMap::load_from_project_settings);
 }
 
+/**
+ * Returns an nonexistent action error message with a suggestion of the closest
+ * matching action name (if possible).
+ */
+String InputMap::_suggest_actions(const StringName &p_action) const {
+	List<StringName> actions = get_actions();
+	StringName closest_action;
+	float closest_similarity = 0.0;
+
+	// Find the most action with the most similar name.
+	for (List<StringName>::Element *E = actions.front(); E; E = E->next()) {
+		const float similarity = String(E->get()).similarity(p_action);
+
+		if (similarity > closest_similarity) {
+			closest_action = E->get();
+			closest_similarity = similarity;
+		}
+	}
+
+	String error_message = vformat("The InputMap action \"%s\" doesn't exist.", p_action);
+
+	if (closest_similarity >= 0.4) {
+		// Only include a suggestion in the error message if it's similar enough.
+		error_message += vformat(" Did you mean \"%s\"?", closest_action);
+	}
+	return error_message;
+}
+
 void InputMap::add_action(const StringName &p_action, float p_deadzone) {
-	ERR_FAIL_COND_MSG(input_map.has(p_action), "InputMap already has action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(input_map.has(p_action), "InputMap already has action \"" + String(p_action) + "\".");
 	input_map[p_action] = Action();
 	static int last_id = 1;
 	input_map[p_action].id = last_id;
@@ -64,7 +92,8 @@ void InputMap::add_action(const StringName &p_action, float p_deadzone) {
 }
 
 void InputMap::erase_action(const StringName &p_action) {
-	ERR_FAIL_COND_MSG(!input_map.has(p_action), "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(!input_map.has(p_action), _suggest_actions(p_action));
+
 	input_map.erase(p_action);
 }
 
@@ -122,20 +151,20 @@ bool InputMap::has_action(const StringName &p_action) const {
 }
 
 float InputMap::action_get_deadzone(const StringName &p_action) {
-	ERR_FAIL_COND_V_MSG(!input_map.has(p_action), 0.0f, "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_V_MSG(!input_map.has(p_action), 0.0f, _suggest_actions(p_action));
 
 	return input_map[p_action].deadzone;
 }
 
 void InputMap::action_set_deadzone(const StringName &p_action, float p_deadzone) {
-	ERR_FAIL_COND_MSG(!input_map.has(p_action), "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(!input_map.has(p_action), _suggest_actions(p_action));
 
 	input_map[p_action].deadzone = p_deadzone;
 }
 
 void InputMap::action_add_event(const StringName &p_action, const Ref<InputEvent> &p_event) {
 	ERR_FAIL_COND_MSG(p_event.is_null(), "It's not a reference to a valid InputEvent object.");
-	ERR_FAIL_COND_MSG(!input_map.has(p_action), "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(!input_map.has(p_action), _suggest_actions(p_action));
 	if (_find_event(input_map[p_action], p_event, true)) {
 		return; // Already addded.
 	}
@@ -144,12 +173,12 @@ void InputMap::action_add_event(const StringName &p_action, const Ref<InputEvent
 }
 
 bool InputMap::action_has_event(const StringName &p_action, const Ref<InputEvent> &p_event) {
-	ERR_FAIL_COND_V_MSG(!input_map.has(p_action), false, "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_V_MSG(!input_map.has(p_action), false, _suggest_actions(p_action));
 	return (_find_event(input_map[p_action], p_event, true) != nullptr);
 }
 
 void InputMap::action_erase_event(const StringName &p_action, const Ref<InputEvent> &p_event) {
-	ERR_FAIL_COND_MSG(!input_map.has(p_action), "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(!input_map.has(p_action), _suggest_actions(p_action));
 
 	List<Ref<InputEvent>>::Element *E = _find_event(input_map[p_action], p_event, true);
 	if (E) {
@@ -161,7 +190,7 @@ void InputMap::action_erase_event(const StringName &p_action, const Ref<InputEve
 }
 
 void InputMap::action_erase_events(const StringName &p_action) {
-	ERR_FAIL_COND_MSG(!input_map.has(p_action), "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_MSG(!input_map.has(p_action), _suggest_actions(p_action));
 
 	input_map[p_action].inputs.clear();
 }
@@ -193,7 +222,7 @@ bool InputMap::event_is_action(const Ref<InputEvent> &p_event, const StringName
 
 bool InputMap::event_get_action_status(const Ref<InputEvent> &p_event, const StringName &p_action, bool p_exact_match, bool *p_pressed, float *p_strength, float *p_raw_strength) const {
 	OrderedHashMap<StringName, Action>::Element E = input_map.find(p_action);
-	ERR_FAIL_COND_V_MSG(!E, false, "Request for nonexistent InputMap action '" + String(p_action) + "'.");
+	ERR_FAIL_COND_V_MSG(!E, false, _suggest_actions(p_action));
 
 	Ref<InputEventAction> input_event_action = p_event;
 	if (input_event_action.is_valid()) {
diff --git a/core/input/input_map.h b/core/input/input_map.h
index 99c71e1e53..0e0567464a 100644
--- a/core/input/input_map.h
+++ b/core/input/input_map.h
@@ -61,6 +61,7 @@ private:
 
 	Array _action_get_events(const StringName &p_action);
 	Array _get_actions();
+	String _suggest_actions(const StringName &p_action) const;
 
 protected:
 	static void _bind_methods();
diff --git a/core/io/file_access_pack.h b/core/io/file_access_pack.h
index 343adbe592..955108f455 100644
--- a/core/io/file_access_pack.h
+++ b/core/io/file_access_pack.h
@@ -36,6 +36,7 @@
 #include "core/string/print_string.h"
 #include "core/templates/list.h"
 #include "core/templates/map.h"
+#include "core/templates/set.h"
 
 // Godot's packed file magic header ("GDPC" in ASCII).
 #define PACK_HEADER_MAGIC 0x43504447
diff --git a/core/io/resource_importer.cpp b/core/io/resource_importer.cpp
index 5ca0eb884a..b503655edd 100644
--- a/core/io/resource_importer.cpp
+++ b/core/io/resource_importer.cpp
@@ -192,6 +192,34 @@ bool ResourceFormatImporter::recognize_path(const String &p_path, const String &
 	return FileAccess::exists(p_path + ".import");
 }
 
+Error ResourceFormatImporter::get_import_order_threads_and_importer(const String &p_path, int &r_order, bool &r_can_threads, String &r_importer) const {
+	r_order = 0;
+	r_importer = "";
+
+	r_can_threads = false;
+	Ref<ResourceImporter> importer;
+
+	if (FileAccess::exists(p_path + ".import")) {
+		PathAndType pat;
+		Error err = _get_path_and_type(p_path, pat);
+
+		if (err == OK) {
+			importer = get_importer_by_name(pat.importer);
+		}
+	} else {
+		importer = get_importer_by_extension(p_path.get_extension().to_lower());
+	}
+
+	if (importer.is_valid()) {
+		r_order = importer->get_import_order();
+		r_importer = importer->get_importer_name();
+		r_can_threads = importer->can_import_threaded();
+		return OK;
+	} else {
+		return ERR_INVALID_PARAMETER;
+	}
+}
+
 int ResourceFormatImporter::get_import_order(const String &p_path) const {
 	Ref<ResourceImporter> importer;
 
diff --git a/core/io/resource_importer.h b/core/io/resource_importer.h
index eeb486073e..a14d6ba52c 100644
--- a/core/io/resource_importer.h
+++ b/core/io/resource_importer.h
@@ -72,6 +72,8 @@ public:
 
 	virtual int get_import_order(const String &p_path) const;
 
+	Error get_import_order_threads_and_importer(const String &p_path, int &r_order, bool &r_can_threads, String &r_importer) const;
+
 	String get_internal_resource_path(const String &p_path) const;
 	void get_internal_resource_path_list(const String &p_path, List<String> *r_paths);
 
@@ -126,6 +128,9 @@ public:
 	virtual String get_option_group_file() const { return String(); }
 
 	virtual Error import(const String &p_source_file, const String &p_save_path, const Map<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files = nullptr, Variant *r_metadata = nullptr) = 0;
+	virtual bool can_import_threaded() const { return true; }
+	virtual void import_threaded_begin() {}
+	virtual void import_threaded_end() {}
 
 	virtual Error import_group_file(const String &p_group_file, const Map<String, Map<StringName, Variant>> &p_source_file_options, const Map<String, String> &p_base_paths) { return ERR_UNAVAILABLE; }
 	virtual bool are_import_settings_valid(const String &p_path) const { return true; }
diff --git a/core/math/color.h b/core/math/color.h
index 5eb8b1119a..e404d80c8a 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -197,13 +197,13 @@ struct Color {
 
 	// For the binder.
 	_FORCE_INLINE_ void set_r8(int32_t r8) { r = (CLAMP(r8, 0, 255) / 255.0); }
-	_FORCE_INLINE_ int32_t get_r8() const { return int32_t(CLAMP(r * 255.0, 0.0, 255.0)); }
+	_FORCE_INLINE_ int32_t get_r8() const { return int32_t(CLAMP(Math::round(r * 255.0f), 0.0f, 255.0f)); }
 	_FORCE_INLINE_ void set_g8(int32_t g8) { g = (CLAMP(g8, 0, 255) / 255.0); }
-	_FORCE_INLINE_ int32_t get_g8() const { return int32_t(CLAMP(g * 255.0, 0.0, 255.0)); }
+	_FORCE_INLINE_ int32_t get_g8() const { return int32_t(CLAMP(Math::round(g * 255.0f), 0.0f, 255.0f)); }
 	_FORCE_INLINE_ void set_b8(int32_t b8) { b = (CLAMP(b8, 0, 255) / 255.0); }
-	_FORCE_INLINE_ int32_t get_b8() const { return int32_t(CLAMP(b * 255.0, 0.0, 255.0)); }
+	_FORCE_INLINE_ int32_t get_b8() const { return int32_t(CLAMP(Math::round(b * 255.0f), 0.0f, 255.0f)); }
 	_FORCE_INLINE_ void set_a8(int32_t a8) { a = (CLAMP(a8, 0, 255) / 255.0); }
-	_FORCE_INLINE_ int32_t get_a8() const { return int32_t(CLAMP(a * 255.0, 0.0, 255.0)); }
+	_FORCE_INLINE_ int32_t get_a8() const { return int32_t(CLAMP(Math::round(a * 255.0f), 0.0f, 255.0f)); }
 
 	_FORCE_INLINE_ void set_h(float p_h) { set_hsv(p_h, get_s(), get_v()); }
 	_FORCE_INLINE_ void set_s(float p_s) { set_hsv(get_h(), p_s, get_v()); }
diff --git a/core/templates/map.h b/core/templates/map.h
index 51a237472d..7dfee13d2c 100644
--- a/core/templates/map.h
+++ b/core/templates/map.h
@@ -32,7 +32,7 @@
 #define MAP_H
 
 #include "core/error/error_macros.h"
-#include "core/templates/set.h"
+#include "core/os/memory.h"
 
 // based on the very nice implementation of rb-trees by:
 // https://web.archive.org/web/20120507164830/http://web.mit.edu/~emin/www/source_code/red_black_tree/index.html
diff --git a/core/templates/thread_work_pool.h b/core/templates/thread_work_pool.h
index 19ab1dda3a..9f7a692cc5 100644
--- a/core/templates/thread_work_pool.h
+++ b/core/templates/thread_work_pool.h
@@ -83,7 +83,7 @@ public:
 		ERR_FAIL_COND(!threads); //never initialized
 		ERR_FAIL_COND(current_work != nullptr);
 
-		index.store(0);
+		index.store(0, std::memory_order_release);
 
 		Work<C, M, U> *w = memnew((Work<C, M, U>));
 		w->instance = p_instance;
@@ -104,8 +104,15 @@ public:
 		return current_work != nullptr;
 	}
 
+	bool is_done_dispatching() const {
+		ERR_FAIL_COND_V(current_work == nullptr, false);
+		return index.load(std::memory_order_acquire) >= current_work->max_elements;
+	}
+
 	uint32_t get_work_index() const {
-		return index;
+		ERR_FAIL_COND_V(current_work == nullptr, 0);
+		uint32_t idx = index.load(std::memory_order_acquire);
+		return MIN(idx, current_work->max_elements);
 	}
 
 	void end_work() {
diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp
index 61f3f7d82e..7f83e27dfe 100644
--- a/core/variant/variant_call.cpp
+++ b/core/variant/variant_call.cpp
@@ -143,36 +143,6 @@ static _FORCE_INLINE_ void vc_ptrcall(void (T::*method)(P...) const, void *p_bas
 }
 
 template <class R, class T, class... P>
-static _FORCE_INLINE_ void vc_change_return_type(R (T::*method)(P...), Variant *v) {
-	VariantTypeAdjust<R>::adjust(v);
-}
-
-template <class R, class T, class... P>
-static _FORCE_INLINE_ void vc_change_return_type(R (T::*method)(P...) const, Variant *v) {
-	VariantTypeAdjust<R>::adjust(v);
-}
-
-template <class T, class... P>
-static _FORCE_INLINE_ void vc_change_return_type(void (T::*method)(P...), Variant *v) {
-	VariantInternal::clear(v);
-}
-
-template <class T, class... P>
-static _FORCE_INLINE_ void vc_change_return_type(void (T::*method)(P...) const, Variant *v) {
-	VariantInternal::clear(v);
-}
-
-template <class R, class... P>
-static _FORCE_INLINE_ void vc_change_return_type(R (*method)(P...), Variant *v) {
-	VariantTypeAdjust<R>::adjust(v);
-}
-
-template <class... P>
-static _FORCE_INLINE_ void vc_change_return_type(void (*method)(P...), Variant *v) {
-	VariantInternal::clear(v);
-}
-
-template <class R, class T, class... P>
 static _FORCE_INLINE_ int vc_get_argument_count(R (T::*method)(P...)) {
 	return sizeof...(P);
 }
@@ -333,7 +303,6 @@ static _FORCE_INLINE_ Variant::Type vc_get_base_type(void (T::*method)(P...) con
 			vc_method_call(m_method_ptr, base, p_args, p_argcount, r_ret, p_defvals, r_error);                                                                    \
 		}                                                                                                                                                         \
 		static void validated_call(Variant *base, const Variant **p_args, int p_argcount, Variant *r_ret) {                                                       \
-			vc_change_return_type(m_method_ptr, r_ret);                                                                                                           \
 			vc_validated_call(m_method_ptr, base, p_args, r_ret);                                                                                                 \
 		}                                                                                                                                                         \
 		static void ptrcall(void *p_base, const void **p_args, void *r_ret, int p_argcount) {                                                                     \
@@ -384,7 +353,6 @@ static _FORCE_INLINE_ void vc_static_ptrcall(void (*method)(P...), const void **
 			vc_static_method_call(m_method_ptr, p_args, p_argcount, r_ret, p_defvals, r_error);                                                                   \
 		}                                                                                                                                                         \
 		static void validated_call(Variant *base, const Variant **p_args, int p_argcount, Variant *r_ret) {                                                       \
-			vc_change_return_type(m_method_ptr, r_ret);                                                                                                           \
 			vc_validated_static_call(m_method_ptr, p_args, r_ret);                                                                                                \
 		}                                                                                                                                                         \
 		static void ptrcall(void *p_base, const void **p_args, void *r_ret, int p_argcount) {                                                                     \
@@ -435,7 +403,6 @@ static _FORCE_INLINE_ void vc_ptrcall(void (*method)(T *, P...), void *p_base, c
 			vc_method_call_static(m_method_ptr, base, p_args, p_argcount, r_ret, p_defvals, r_error);                                                             \
 		}                                                                                                                                                         \
 		static void validated_call(Variant *base, const Variant **p_args, int p_argcount, Variant *r_ret) {                                                       \
-			vc_change_return_type(m_method_ptr, r_ret);                                                                                                           \
 			vc_validated_call_static(m_method_ptr, base, p_args, r_ret);                                                                                          \
 		}                                                                                                                                                         \
 		static void ptrcall(void *p_base, const void **p_args, void *r_ret, int p_argcount) {                                                                     \
diff --git a/core/variant/variant_setget.cpp b/core/variant/variant_setget.cpp
index f319631ce5..9ab8602782 100644
--- a/core/variant/variant_setget.cpp
+++ b/core/variant/variant_setget.cpp
@@ -1045,6 +1045,7 @@ void register_indexed_setters_getters() {
 	REGISTER_INDEXED_MEMBER(PackedByteArray);
 	REGISTER_INDEXED_MEMBER(PackedInt32Array);
 	REGISTER_INDEXED_MEMBER(PackedInt64Array);
+	REGISTER_INDEXED_MEMBER(PackedFloat32Array);
 	REGISTER_INDEXED_MEMBER(PackedFloat64Array);
 	REGISTER_INDEXED_MEMBER(PackedVector2Array);
 	REGISTER_INDEXED_MEMBER(PackedVector3Array);
diff --git a/doc/classes/AStar.xml b/doc/classes/AStar.xml
index e975b8ed28..533ecbd279 100644
--- a/doc/classes/AStar.xml
+++ b/doc/classes/AStar.xml
@@ -289,6 +289,7 @@
 			</argument>
 			<description>
 				Returns an array with the points that are in the path found by AStar between the given points. The array is ordered from the starting point to the ending point of the path.
+				[b]Note:[/b] This method is not thread-safe. If called from a [Thread], it will return an empty [PackedVector2Array] and will print an error message.
 			</description>
 		</method>
 		<method name="get_point_position" qualifiers="const">
diff --git a/doc/classes/AStar2D.xml b/doc/classes/AStar2D.xml
index 2a51678209..3efd2f604c 100644
--- a/doc/classes/AStar2D.xml
+++ b/doc/classes/AStar2D.xml
@@ -258,6 +258,7 @@
 			</argument>
 			<description>
 				Returns an array with the points that are in the path found by AStar2D between the given points. The array is ordered from the starting point to the ending point of the path.
+				[b]Note:[/b] This method is not thread-safe. If called from a [Thread], it will return an empty [PackedVector2Array] and will print an error message.
 			</description>
 		</method>
 		<method name="get_point_position" qualifiers="const">
diff --git a/doc/classes/AnimationNodeTimeSeek.xml b/doc/classes/AnimationNodeTimeSeek.xml
index eb5335c792..171d65fbe0 100644
--- a/doc/classes/AnimationNodeTimeSeek.xml
+++ b/doc/classes/AnimationNodeTimeSeek.xml
@@ -4,7 +4,27 @@
 		A time-seeking animation node to be used with [AnimationTree].
 	</brief_description>
 	<description>
-		This node can be used to cause a seek command to happen to any sub-children of the graph. After setting the time, this value returns to -1.
+		This node can be used to cause a seek command to happen to any sub-children of the animation graph. Use this node type to play an [Animation] from the start or a certain playback position inside the [AnimationNodeBlendTree]. After setting the time and changing the animation playback, the seek node automatically goes into sleep mode on the next process frame by setting its [code]seek_position[/code] value to [code]-1.0[/code].
+		[codeblocks]
+		[gdscript]
+		# Play child animation from the start.
+		animation_tree.set("parameters/Seek/seek_position", 0.0)
+		# Alternative syntax (same result as above).
+		animation_tree["parameters/Seek/seek_position"] = 0.0
+
+		# Play child animation from 12 second timestamp.
+		animation_tree.set("parameters/Seek/seek_position", 12.0)
+		# Alternative syntax (same result as above).
+		animation_tree["parameters/Seek/seek_position"] = 12.0
+		[/gdscript]
+		[csharp]
+		// Play child animation from the start.
+		animationTree.Set("parameters/Seek/seek_position", 0.0);
+
+		// Play child animation from 12 second timestamp.
+		animationTree.Set("parameters/Seek/seek_position", 12.0);
+		[/csharp]
+		[/codeblocks]
 	</description>
 	<tutorials>
 		<link title="AnimationTree">https://docs.godotengine.org/en/latest/tutorials/animation/animation_tree.html</link>
diff --git a/doc/classes/Area2D.xml b/doc/classes/Area2D.xml
index 9711a2a35b..ed11d26271 100644
--- a/doc/classes/Area2D.xml
+++ b/doc/classes/Area2D.xml
@@ -1,10 +1,10 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="Area2D" inherits="CollisionObject2D" version="4.0">
 	<brief_description>
-		2D area for detection and 2D physics influence.
+		2D area for detection and physics and audio influence.
 	</brief_description>
 	<description>
-		2D area that detects [CollisionObject2D] nodes overlapping, entering, or exiting. Can also alter or override local physics parameters (gravity, damping).
+		2D area that detects [CollisionObject2D] nodes overlapping, entering, or exiting. Can also alter or override local physics parameters (gravity, damping) and route audio to a custom audio bus.
 	</description>
 	<tutorials>
 		<link title="Using Area2D">https://docs.godotengine.org/en/latest/tutorials/physics/using_area_2d.html</link>
diff --git a/doc/classes/Area3D.xml b/doc/classes/Area3D.xml
index 4271769155..206a2a61b4 100644
--- a/doc/classes/Area3D.xml
+++ b/doc/classes/Area3D.xml
@@ -1,10 +1,10 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="Area3D" inherits="CollisionObject3D" version="4.0">
 	<brief_description>
-		General-purpose area node for detection and 3D physics influence.
+		3D area for detection and physics and audio influence.
 	</brief_description>
 	<description>
-		3D area that detects [CollisionObject3D] nodes overlapping, entering, or exiting. Can also alter or override local physics parameters (gravity, damping).
+		3D area that detects [CollisionObject3D] nodes overlapping, entering, or exiting. Can also alter or override local physics parameters (gravity, damping) and route audio to custom audio buses.
 	</description>
 	<tutorials>
 		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
diff --git a/doc/classes/CapsuleMesh.xml b/doc/classes/CapsuleMesh.xml
index fab11d44cc..031abd0112 100644
--- a/doc/classes/CapsuleMesh.xml
+++ b/doc/classes/CapsuleMesh.xml
@@ -12,7 +12,8 @@
 	</methods>
 	<members>
 		<member name="mid_height" type="float" setter="set_mid_height" getter="get_mid_height" default="1.0">
-			Height of the capsule mesh from the center point.
+			Height of the middle cylindrical part of the capsule (without the hemispherical ends).
+			[b]Note:[/b] The capsule's total height is equal to [member mid_height] + 2 * [member radius].
 		</member>
 		<member name="radial_segments" type="int" setter="set_radial_segments" getter="get_radial_segments" default="64">
 			Number of radial segments on the capsule mesh.
diff --git a/doc/classes/ColorPicker.xml b/doc/classes/ColorPicker.xml
index 83223bb645..fddfd27573 100644
--- a/doc/classes/ColorPicker.xml
+++ b/doc/classes/ColorPicker.xml
@@ -51,7 +51,7 @@
 			If [code]true[/code], allows editing the color with Hue/Saturation/Value sliders.
 			[b]Note:[/b] Cannot be enabled if raw mode is on.
 		</member>
-		<member name="picker_shape" type="int" setter="set_picker_shape" getter="get_picker_shape" default="0">
+		<member name="picker_shape" type="int" setter="set_picker_shape" getter="get_picker_shape" enum="ColorPicker.PickerShapeType" default="0">
 			The shape of the color space view. See [enum PickerShapeType].
 		</member>
 		<member name="presets_enabled" type="bool" setter="set_presets_enabled" getter="are_presets_enabled" default="true">
@@ -122,6 +122,8 @@
 		<theme_item name="overbright_indicator" type="Texture2D">
 			The indicator used to signalize that the color value is outside the 0-1 range.
 		</theme_item>
+		<theme_item name="picker_cursor" type="Texture2D">
+		</theme_item>
 		<theme_item name="preset_bg" type="Texture2D">
 		</theme_item>
 		<theme_item name="screen_picker" type="Texture2D">
diff --git a/doc/classes/Geometry2D.xml b/doc/classes/Geometry2D.xml
index 2c0d9b54d1..13354ec19e 100644
--- a/doc/classes/Geometry2D.xml
+++ b/doc/classes/Geometry2D.xml
@@ -184,7 +184,7 @@
 			</argument>
 			<description>
 				Merges (combines) [code]polygon_a[/code] and [code]polygon_b[/code] and returns an array of merged polygons. This performs [constant OPERATION_UNION] between polygons.
-				The operation may result in an outer polygon (boundary) and inner polygon (hole) produced which could be distinguished by calling [method is_polygon_clockwise].
+				The operation may result in an outer polygon (boundary) and multiple inner polygons (holes) produced which could be distinguished by calling [method is_polygon_clockwise].
 			</description>
 		</method>
 		<method name="offset_polygon">
diff --git a/doc/classes/HeightMapShape3D.xml b/doc/classes/HeightMapShape3D.xml
index 6d230bdab8..f6f2a27891 100644
--- a/doc/classes/HeightMapShape3D.xml
+++ b/doc/classes/HeightMapShape3D.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="HeightMapShape3D" inherits="Shape3D" version="4.0">
 	<brief_description>
-		Height map shape for 3D physics (Bullet only).
+		Height map shape for 3D physics.
 	</brief_description>
 	<description>
 		Height map shape resource, which can be added to a [PhysicsBody3D] or [Area3D].
diff --git a/doc/classes/Image.xml b/doc/classes/Image.xml
index 9d87c9bf9a..91a07f66e0 100644
--- a/doc/classes/Image.xml
+++ b/doc/classes/Image.xml
@@ -186,7 +186,8 @@
 			<return type="int" enum="Error">
 			</return>
 			<description>
-				Decompresses the image if it is compressed. Returns an error if decompress function is not available.
+				Decompresses the image if it is VRAM compressed in a supported format. Returns [constant OK] if the format is supported, otherwise [constant ERR_UNAVAILABLE].
+				[b]Note:[/b] The following formats can be decompressed: DXT, RGTC, BPTC, PVRTC1. The formats ETC1 and ETC2 are not supported.
 			</description>
 		</method>
 		<method name="detect_alpha" qualifiers="const">
diff --git a/doc/classes/LineEdit.xml b/doc/classes/LineEdit.xml
index 360f5c451e..7adf19632e 100644
--- a/doc/classes/LineEdit.xml
+++ b/doc/classes/LineEdit.xml
@@ -12,34 +12,25 @@
 		- [kbd]Ctrl + Z[/kbd]: Undo
 		- [kbd]Ctrl + ~[/kbd]: Swap input direction.
 		- [kbd]Ctrl + Shift + Z[/kbd]: Redo
-		- [kbd]Ctrl + U[/kbd]: Delete text from the cursor position to the beginning of the line
-		- [kbd]Ctrl + K[/kbd]: Delete text from the cursor position to the end of the line
+		- [kbd]Ctrl + U[/kbd]: Delete text from the caret position to the beginning of the line
+		- [kbd]Ctrl + K[/kbd]: Delete text from the caret position to the end of the line
 		- [kbd]Ctrl + A[/kbd]: Select all text
-		- [kbd]Up Arrow[/kbd]/[kbd]Down Arrow[/kbd]: Move the cursor to the beginning/end of the line
+		- [kbd]Up Arrow[/kbd]/[kbd]Down Arrow[/kbd]: Move the caret to the beginning/end of the line
 		On macOS, some extra keyboard shortcuts are available:
-		- [kbd]Ctrl + F[/kbd]: Same as [kbd]Right Arrow[/kbd], move the cursor one character right
-		- [kbd]Ctrl + B[/kbd]: Same as [kbd]Left Arrow[/kbd], move the cursor one character left
-		- [kbd]Ctrl + P[/kbd]: Same as [kbd]Up Arrow[/kbd], move the cursor to the previous line
-		- [kbd]Ctrl + N[/kbd]: Same as [kbd]Down Arrow[/kbd], move the cursor to the next line
-		- [kbd]Ctrl + D[/kbd]: Same as [kbd]Delete[/kbd], delete the character on the right side of cursor
-		- [kbd]Ctrl + H[/kbd]: Same as [kbd]Backspace[/kbd], delete the character on the left side of the cursor
-		- [kbd]Ctrl + A[/kbd]: Same as [kbd]Home[/kbd], move the cursor to the beginning of the line
-		- [kbd]Ctrl + E[/kbd]: Same as [kbd]End[/kbd], move the cursor to the end of the line
-		- [kbd]Cmd + Left Arrow[/kbd]: Same as [kbd]Home[/kbd], move the cursor to the beginning of the line
-		- [kbd]Cmd + Right Arrow[/kbd]: Same as [kbd]End[/kbd], move the cursor to the end of the line
+		- [kbd]Ctrl + F[/kbd]: Same as [kbd]Right Arrow[/kbd], move the caret one character right
+		- [kbd]Ctrl + B[/kbd]: Same as [kbd]Left Arrow[/kbd], move the caret one character left
+		- [kbd]Ctrl + P[/kbd]: Same as [kbd]Up Arrow[/kbd], move the caret to the previous line
+		- [kbd]Ctrl + N[/kbd]: Same as [kbd]Down Arrow[/kbd], move the caret to the next line
+		- [kbd]Ctrl + D[/kbd]: Same as [kbd]Delete[/kbd], delete the character on the right side of caret
+		- [kbd]Ctrl + H[/kbd]: Same as [kbd]Backspace[/kbd], delete the character on the left side of the caret
+		- [kbd]Ctrl + A[/kbd]: Same as [kbd]Home[/kbd], move the caret to the beginning of the line
+		- [kbd]Ctrl + E[/kbd]: Same as [kbd]End[/kbd], move the caret to the end of the line
+		- [kbd]Cmd + Left Arrow[/kbd]: Same as [kbd]Home[/kbd], move the caret to the beginning of the line
+		- [kbd]Cmd + Right Arrow[/kbd]: Same as [kbd]End[/kbd], move the caret to the end of the line
 	</description>
 	<tutorials>
 	</tutorials>
 	<methods>
-		<method name="append_at_cursor">
-			<return type="void">
-			</return>
-			<argument index="0" name="text" type="String">
-			</argument>
-			<description>
-				Adds [code]text[/code] after the cursor. If the resulting value is longer than [member max_length], nothing happens.
-			</description>
-		</method>
 		<method name="clear">
 			<return type="void">
 			</return>
@@ -54,11 +45,11 @@
 				Removes all OpenType features.
 			</description>
 		</method>
-		<method name="delete_char_at_cursor">
+		<method name="delete_char_at_caret">
 			<return type="void">
 			</return>
 			<description>
-				Deletes one character at the cursor's current position (equivalent to pressing [kbd]Delete[/kbd]).
+				Deletes one character at the caret's current position (equivalent to pressing [kbd]Delete[/kbd]).
 			</description>
 		</method>
 		<method name="delete_text">
@@ -99,7 +90,16 @@
 			<return type="int">
 			</return>
 			<description>
-				Returns the scroll offset due to [member caret_position], as a number of characters.
+				Returns the scroll offset due to [member caret_column], as a number of characters.
+			</description>
+		</method>
+		<method name="insert_text_at_caret">
+			<return type="void">
+			</return>
+			<argument index="0" name="text" type="String">
+			</argument>
+			<description>
+				Inserts [code]text[/code] at the caret. If the resulting value is longer than [member max_length], nothing happens.
 			</description>
 		</method>
 		<method name="menu_option">
@@ -159,21 +159,21 @@
 		<member name="align" type="int" setter="set_align" getter="get_align" enum="LineEdit.Align" default="0">
 			Text alignment as defined in the [enum Align] enum.
 		</member>
-		<member name="caret_blink" type="bool" setter="cursor_set_blink_enabled" getter="cursor_get_blink_enabled" default="false">
-			If [code]true[/code], the caret (visual cursor) blinks.
+		<member name="caret_blink" type="bool" setter="set_caret_blink_enabled" getter="is_caret_blink_enabled" default="false">
+			If [code]true[/code], the caret (text cursor) blinks.
 		</member>
-		<member name="caret_blink_speed" type="float" setter="cursor_set_blink_speed" getter="cursor_get_blink_speed" default="0.65">
+		<member name="caret_blink_speed" type="float" setter="set_caret_blink_speed" getter="get_caret_blink_speed" default="0.65">
 			Duration (in seconds) of a caret's blinking cycle.
 		</member>
-		<member name="caret_force_displayed" type="bool" setter="cursor_set_force_displayed" getter="cursor_get_force_displayed" default="false">
+		<member name="caret_column" type="int" setter="set_caret_column" getter="get_caret_column" default="0">
+			The caret's column position inside the [LineEdit]. When set, the text may scroll to accommodate it.
 		</member>
-		<member name="caret_mid_grapheme" type="bool" setter="set_mid_grapheme_caret_enabled" getter="get_mid_grapheme_caret_enabled" default="false">
+		<member name="caret_force_displayed" type="bool" setter="set_caret_force_displayed" getter="is_caret_force_displayed" default="false">
+		</member>
+		<member name="caret_mid_grapheme" type="bool" setter="set_caret_mid_grapheme_enabled" getter="is_caret_mid_grapheme_enabled" default="false">
 			Allow moving caret, selecting and removing the individual composite character components.
 			Note: [kbd]Backspace[/kbd] is always removing individual composite character components.
 		</member>
-		<member name="caret_position" type="int" setter="set_cursor_position" getter="get_cursor_position" default="0">
-			The cursor's position inside the [LineEdit]. When set, the text may scroll to accommodate it.
-		</member>
 		<member name="clear_button_enabled" type="bool" setter="set_clear_button_enabled" getter="is_clear_button_enabled" default="false">
 			If [code]true[/code], the [LineEdit] will show a clear button if [code]text[/code] is not empty, which can be used to clear the text quickly.
 		</member>
@@ -186,7 +186,7 @@
 		<member name="editable" type="bool" setter="set_editable" getter="is_editable" default="true">
 			If [code]false[/code], existing text cannot be modified and new text cannot be added.
 		</member>
-		<member name="expand_to_text_length" type="bool" setter="set_expand_to_text_length" getter="get_expand_to_text_length" default="false">
+		<member name="expand_to_text_length" type="bool" setter="set_expand_to_text_length_enabled" getter="is_expand_to_text_length_enabled" default="false">
 			If [code]true[/code], the [LineEdit] width will increase to stay longer than the [member text]. It will [b]not[/b] compress if the [member text] is shortened.
 		</member>
 		<member name="focus_mode" type="int" setter="set_focus_mode" getter="get_focus_mode" override="true" enum="Control.FocusMode" default="2" />
@@ -276,7 +276,7 @@
 			Copies the selected text.
 		</constant>
 		<constant name="MENU_PASTE" value="2" enum="MenuItems">
-			Pastes the clipboard text over the selected text (or at the cursor's position).
+			Pastes the clipboard text over the selected text (or at the caret's position).
 			Non-printable escape characters are automatically stripped from the OS clipboard via [method String.strip_escapes].
 		</constant>
 		<constant name="MENU_CLEAR" value="3" enum="MenuItems">
@@ -359,6 +359,9 @@
 		</constant>
 	</constants>
 	<theme_items>
+		<theme_item name="caret_color" type="Color" default="Color( 0.94, 0.94, 0.94, 1 )">
+			Color of the [LineEdit]'s caret (text cursor).
+		</theme_item>
 		<theme_item name="clear" type="Texture2D">
 			Texture for the clear button. See [member clear_button_enabled].
 		</theme_item>
@@ -368,9 +371,6 @@
 		<theme_item name="clear_button_color_pressed" type="Color" default="Color( 1, 1, 1, 1 )">
 			Color used for the clear button when it's pressed.
 		</theme_item>
-		<theme_item name="cursor_color" type="Color" default="Color( 0.94, 0.94, 0.94, 1 )">
-			Color of the [LineEdit]'s visual cursor (caret).
-		</theme_item>
 		<theme_item name="focus" type="StyleBox">
 			Background used when [LineEdit] has GUI focus.
 		</theme_item>
diff --git a/doc/classes/MeshInstance3D.xml b/doc/classes/MeshInstance3D.xml
index 82cd392cd3..e1a6cf44a7 100644
--- a/doc/classes/MeshInstance3D.xml
+++ b/doc/classes/MeshInstance3D.xml
@@ -43,7 +43,7 @@
 				Returns the [Material] that will be used by the [Mesh] when drawing. This can return the [member GeometryInstance3D.material_override], the surface override [Material] defined in this [MeshInstance3D], or the surface [Material] defined in the [Mesh]. For example, if [member GeometryInstance3D.material_override] is used, all surfaces will return the override material.
 			</description>
 		</method>
-		<method name="get_surface_material" qualifiers="const">
+		<method name="get_surface_override_material" qualifiers="const">
 			<return type="Material">
 			</return>
 			<argument index="0" name="surface" type="int">
@@ -52,14 +52,14 @@
 				Returns the override [Material] for the specified surface of the [Mesh] resource.
 			</description>
 		</method>
-		<method name="get_surface_material_count" qualifiers="const">
+		<method name="get_surface_override_material_count" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Returns the number of surface materials.
+				Returns the number of surface override materials. This is equivalent to [method Mesh.get_surface_count].
 			</description>
 		</method>
-		<method name="set_surface_material">
+		<method name="set_surface_override_material">
 			<return type="void">
 			</return>
 			<argument index="0" name="surface" type="int">
diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml
index b5335e47cd..523f3a0c17 100644
--- a/doc/classes/Node.xml
+++ b/doc/classes/Node.xml
@@ -38,7 +38,7 @@
 			</description>
 		</method>
 		<method name="_get_configuration_warnings" qualifiers="virtual">
-			<return type="Array">
+			<return type="String[]">
 			</return>
 			<description>
 				The elements in the array returned from this method are displayed as warnings in the Scene Dock if the script that overrides it is a [code]tool[/code] script.
diff --git a/doc/classes/PackedByteArray.xml b/doc/classes/PackedByteArray.xml
index 21f835a53c..24178c3ff6 100644
--- a/doc/classes/PackedByteArray.xml
+++ b/doc/classes/PackedByteArray.xml
@@ -61,6 +61,114 @@
 				Returns a new [PackedByteArray] with the data compressed. Set the compression mode using one of [enum File.CompressionMode]'s constants.
 			</description>
 		</method>
+		<method name="decode_double" qualifiers="const">
+			<return type="float">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_float" qualifiers="const">
+			<return type="float">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_half" qualifiers="const">
+			<return type="float">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_s16" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_s32" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_s64" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_s8" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_u16" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_u32" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_u64" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_u8" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_var" qualifiers="const">
+			<return type="Variant">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="allow_objects" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="decode_var_size" qualifiers="const">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="allow_objects" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="decompress" qualifiers="const">
 			<return type="PackedByteArray">
 			</return>
@@ -92,6 +200,128 @@
 				Creates a copy of the array, and returns it.
 			</description>
 		</method>
+		<method name="encode_double">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="float">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_float">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="float">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_half">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="float">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_s16">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_s32">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_s64">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_s8">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_u16">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_u32">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_u64">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_u8">
+			<return type="void">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="encode_var">
+			<return type="int">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="value" type="Variant">
+			</argument>
+			<argument index="2" name="allow_objects" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="get_string_from_ascii" qualifiers="const">
 			<return type="String">
 			</return>
@@ -129,6 +359,16 @@
 				Returns [code]true[/code] if the array contains [code]value[/code].
 			</description>
 		</method>
+		<method name="has_encoded_var" qualifiers="const">
+			<return type="bool">
+			</return>
+			<argument index="0" name="byte_offset" type="int">
+			</argument>
+			<argument index="1" name="allow_objects" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="hex_encode" qualifiers="const">
 			<return type="String">
 			</return>
diff --git a/doc/classes/PackedFloat32Array.xml b/doc/classes/PackedFloat32Array.xml
index 6be1d24b5d..5e0008852c 100644
--- a/doc/classes/PackedFloat32Array.xml
+++ b/doc/classes/PackedFloat32Array.xml
@@ -111,6 +111,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="operator []" qualifiers="operator">
+			<return type="float">
+			</return>
+			<argument index="0" name="index" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="push_back">
 			<return type="bool">
 			</return>
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index 5b9150ab04..c8fd0d667b 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -6,7 +6,7 @@
 	<description>
 		Contains global variables accessible from everywhere. Use [method get_setting], [method set_setting] or [method has_setting] to access them. Variables stored in [code]project.godot[/code] are also loaded into ProjectSettings, making this object very useful for reading custom game configuration options.
 		When naming a Project Settings property, use the full path to the setting including the category. For example, [code]"application/config/name"[/code] for the project name. Category and property names can be viewed in the Project Settings dialog.
-		[b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary.
+		[b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary. Overriding will still take the base project settings' [url=https://docs.godotengine.org/en/latest/tutorials/export/feature_tags.html]feature tags[/url] in account. Therefore, make sure to [i]also[/i] override the setting with the desired feature tags if you want them to override base project settings on all platforms and configurations.
 	</description>
 	<tutorials>
 		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
@@ -255,8 +255,8 @@
 			[b]Note:[/b] Changing this value will also change the user data folder's path if [member application/config/use_custom_user_dir] is [code]false[/code]. After renaming the project, you will no longer be able to access existing data in [code]user://[/code] unless you rename the old folder to match the new project name. See [url=https://docs.godotengine.org/en/latest/tutorials/io/data_paths.html]Data paths[/url] in the documentation for more information.
 		</member>
 		<member name="application/config/project_settings_override" type="String" setter="" getter="" default="&quot;&quot;">
-			Specifies a file to override project settings. For example: [code]user://custom_settings.cfg[/code].
-			[b]Note:[/b] Regardless of this setting's value, [code]res://override.cfg[/code] will still be read to override the project settings (see this class' description at the top).
+			Specifies a file to override project settings. For example: [code]user://custom_settings.cfg[/code]. See "Overriding" in the [ProjectSettings] class description at the top for more information.
+			[b]Note:[/b] Regardless of this setting's value, [code]res://override.cfg[/code] will still be read to override the project settings.
 		</member>
 		<member name="application/config/use_custom_user_dir" type="bool" setter="" getter="" default="false">
 			If [code]true[/code], the project will save user data to its own user directory (see [member application/config/custom_user_dir_name]). This setting is only effective on desktop platforms. A name must be set in the [member application/config/custom_user_dir_name] setting for this to take effect. If [code]false[/code], the project will save user data to [code](OS user data directory)/Godot/app_userdata/(project name)[/code].
@@ -1149,7 +1149,7 @@
 		<member name="navigation/3d/default_cell_size" type="float" setter="" getter="" default="0.3">
 			Default cell size for 3D navigation maps. See [method NavigationServer3D.map_set_cell_size].
 		</member>
-		<member name="navigation/3d/default_edge_connection_margin" type="float" setter="" getter="" default="5.0">
+		<member name="navigation/3d/default_edge_connection_margin" type="float" setter="" getter="" default="0.3">
 			Default edge connection margin for 3D navigation maps. See [method NavigationServer3D.map_set_edge_connection_margin].
 		</member>
 		<member name="network/limits/debugger/max_chars_per_second" type="int" setter="" getter="" default="32768">
diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml
index f82301bcf4..d6eaa1b88b 100644
--- a/doc/classes/RenderingServer.xml
+++ b/doc/classes/RenderingServer.xml
@@ -1317,7 +1317,7 @@
 				Sets the scenario that the instance is in. The scenario is the 3D world that the objects will be displayed in.
 			</description>
 		</method>
-		<method name="instance_set_surface_material">
+		<method name="instance_set_surface_override_material">
 			<return type="void">
 			</return>
 			<argument index="0" name="instance" type="RID">
@@ -1327,7 +1327,7 @@
 			<argument index="2" name="material" type="RID">
 			</argument>
 			<description>
-				Sets the material of a specific surface. Equivalent to [method MeshInstance3D.set_surface_material].
+				Sets the override material of a specific surface. Equivalent to [method MeshInstance3D.set_surface_override_material].
 			</description>
 		</method>
 		<method name="instance_set_transform">
diff --git a/doc/classes/VideoPlayer.xml b/doc/classes/VideoPlayer.xml
index b2ab356b0d..d905ce4054 100644
--- a/doc/classes/VideoPlayer.xml
+++ b/doc/classes/VideoPlayer.xml
@@ -74,6 +74,7 @@
 		</member>
 		<member name="stream_position" type="float" setter="set_stream_position" getter="get_stream_position">
 			The current position of the stream, in seconds.
+			[b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDNative add-on.
 		</member>
 		<member name="volume" type="float" setter="set_volume" getter="get_volume">
 			Audio volume as a linear value.
diff --git a/doc/classes/VisualShader.xml b/doc/classes/VisualShader.xml
index c29c30289a..ff00a848b9 100644
--- a/doc/classes/VisualShader.xml
+++ b/doc/classes/VisualShader.xml
@@ -228,7 +228,9 @@
 		</constant>
 		<constant name="TYPE_END" value="5" enum="Type">
 		</constant>
-		<constant name="TYPE_MAX" value="6" enum="Type">
+		<constant name="TYPE_SKY" value="6" enum="Type">
+		</constant>
+		<constant name="TYPE_MAX" value="7" enum="Type">
 			Represents the size of the [enum Type] enum.
 		</constant>
 		<constant name="NODE_ID_INVALID" value="-1">
diff --git a/doc/tools/makerst.py b/doc/tools/makerst.py
index ae3cc73098..1c6055f8ca 100755
--- a/doc/tools/makerst.py
+++ b/doc/tools/makerst.py
@@ -437,7 +437,7 @@ def make_rst_class(class_def, state, dry_run, output_dir):  # type: (ClassDef, S
         for property_def in class_def.properties.values():
             type_rst = property_def.type_name.to_rst(state)
             default = property_def.default_value
-            if property_def.overridden:
+            if default is not None and property_def.overridden:
                 ml.append((type_rst, property_def.name, default + " *(parent override)*"))
             else:
                 ref = ":ref:`{0}<class_{1}_property_{0}>`".format(property_def.name, class_name)
diff --git a/editor/animation_track_editor.cpp b/editor/animation_track_editor.cpp
index 4fe2d2bb2a..9db2f0a287 100644
--- a/editor/animation_track_editor.cpp
+++ b/editor/animation_track_editor.cpp
@@ -2734,7 +2734,7 @@ void AnimationTrackEdit::_gui_input(const Ref<InputEvent> &p_event) {
 		path_popup->set_size(path_rect.size);
 		path_popup->popup();
 		path->grab_focus();
-		path->set_cursor_position(path->get_text().length());
+		path->set_caret_column(path->get_text().length());
 		clicking_on_name = false;
 	}
 
diff --git a/editor/code_editor.cpp b/editor/code_editor.cpp
index ac8bef817b..1c62c3d3e1 100644
--- a/editor/code_editor.cpp
+++ b/editor/code_editor.cpp
@@ -142,7 +142,7 @@ bool FindReplaceBar::_search(uint32_t p_flags, int p_from_line, int p_from_col)
 	bool found = text_editor->search(text, p_flags, p_from_line, p_from_col, line, col);
 
 	if (found) {
-		if (!preserve_cursor) {
+		if (!preserve_cursor && !is_selection_only()) {
 			text_editor->unfold_line(line);
 			text_editor->cursor_set_line(line, false);
 			text_editor->cursor_set_column(col + text.length(), false);
@@ -488,10 +488,10 @@ void FindReplaceBar::_show_search(bool p_focus_replace, bool p_show_only) {
 	if (!get_search_text().is_empty()) {
 		if (p_focus_replace) {
 			replace_text->select_all();
-			replace_text->set_cursor_position(replace_text->get_text().length());
+			replace_text->set_caret_column(replace_text->get_text().length());
 		} else {
 			search_text->select_all();
-			search_text->set_cursor_position(search_text->get_text().length());
+			search_text->set_caret_column(search_text->get_text().length());
 		}
 
 		results_count = -1;
diff --git a/editor/debugger/script_editor_debugger.cpp b/editor/debugger/script_editor_debugger.cpp
index c92e94270e..1d95161e6c 100644
--- a/editor/debugger/script_editor_debugger.cpp
+++ b/editor/debugger/script_editor_debugger.cpp
@@ -35,6 +35,8 @@
 #include "core/debugger/remote_debugger.h"
 #include "core/io/marshalls.h"
 #include "core/string/ustring.h"
+#include "core/version.h"
+#include "core/version_hash.gen.h"
 #include "editor/debugger/editor_network_profiler.h"
 #include "editor/debugger/editor_performance_profiler.h"
 #include "editor/debugger/editor_profiler.h"
@@ -1371,7 +1373,8 @@ void ScriptEditorDebugger::_error_tree_item_rmb_selected(const Vector2 &p_pos) {
 	item_menu->set_size(Size2(1, 1));
 
 	if (error_tree->is_anything_selected()) {
-		item_menu->add_icon_item(get_theme_icon("ActionCopy", "EditorIcons"), TTR("Copy Error"), 0);
+		item_menu->add_icon_item(get_theme_icon("ActionCopy", "EditorIcons"), TTR("Copy Error"), ACTION_COPY_ERROR);
+		item_menu->add_icon_item(get_theme_icon("Instance", "EditorIcons"), TTR("Open C++ Source on GitHub"), ACTION_OPEN_SOURCE);
 	}
 
 	if (item_menu->get_item_count() > 0) {
@@ -1381,30 +1384,64 @@ void ScriptEditorDebugger::_error_tree_item_rmb_selected(const Vector2 &p_pos) {
 }
 
 void ScriptEditorDebugger::_item_menu_id_pressed(int p_option) {
-	TreeItem *ti = error_tree->get_selected();
-	while (ti->get_parent() != error_tree->get_root()) {
-		ti = ti->get_parent();
-	}
+	switch (p_option) {
+		case ACTION_COPY_ERROR: {
+			TreeItem *ti = error_tree->get_selected();
+			while (ti->get_parent() != error_tree->get_root()) {
+				ti = ti->get_parent();
+			}
 
-	String type;
+			String type;
 
-	if (ti->get_icon(0) == get_theme_icon("Warning", "EditorIcons")) {
-		type = "W ";
-	} else if (ti->get_icon(0) == get_theme_icon("Error", "EditorIcons")) {
-		type = "E ";
-	}
+			if (ti->get_icon(0) == get_theme_icon("Warning", "EditorIcons")) {
+				type = "W ";
+			} else if (ti->get_icon(0) == get_theme_icon("Error", "EditorIcons")) {
+				type = "E ";
+			}
 
-	String text = ti->get_text(0) + "   ";
-	int rpad_len = text.length();
+			String text = ti->get_text(0) + "   ";
+			int rpad_len = text.length();
 
-	text = type + text + ti->get_text(1) + "\n";
-	TreeItem *ci = ti->get_children();
-	while (ci) {
-		text += "  " + ci->get_text(0).rpad(rpad_len) + ci->get_text(1) + "\n";
-		ci = ci->get_next();
-	}
+			text = type + text + ti->get_text(1) + "\n";
+			TreeItem *ci = ti->get_children();
+			while (ci) {
+				text += "  " + ci->get_text(0).rpad(rpad_len) + ci->get_text(1) + "\n";
+				ci = ci->get_next();
+			}
 
-	DisplayServer::get_singleton()->clipboard_set(text);
+			DisplayServer::get_singleton()->clipboard_set(text);
+		} break;
+
+		case ACTION_OPEN_SOURCE: {
+			TreeItem *ti = error_tree->get_selected();
+			while (ti->get_parent() != error_tree->get_root()) {
+				ti = ti->get_parent();
+			}
+
+			// We only need the first child here (C++ source stack trace).
+			TreeItem *ci = ti->get_children();
+			// Parse back the `file:line @ method()` string.
+			const Vector<String> file_line_number = ci->get_text(1).split("@")[0].strip_edges().split(":");
+			ERR_FAIL_COND_MSG(file_line_number.size() < 2, "Incorrect C++ source stack trace file:line format (please report).");
+			const String file = file_line_number[0];
+			const int line_number = file_line_number[1].to_int();
+
+			// Construct a GitHub repository URL and open it in the user's default web browser.
+			if (String(VERSION_HASH).length() >= 1) {
+				// Git commit hash information available; use it for greater accuracy, including for development versions.
+				OS::get_singleton()->shell_open(vformat("https://github.com/godotengine/godot/blob/%s/%s#L%d",
+						VERSION_HASH,
+						file,
+						line_number));
+			} else {
+				// Git commit hash information unavailable; fall back to tagged releases.
+				OS::get_singleton()->shell_open(vformat("https://github.com/godotengine/godot/blob/%s-stable/%s#L%d",
+						VERSION_NUMBER,
+						file,
+						line_number));
+			}
+		} break;
+	}
 }
 
 void ScriptEditorDebugger::_tab_changed(int p_tab) {
diff --git a/editor/debugger/script_editor_debugger.h b/editor/debugger/script_editor_debugger.h
index e5fb3c35a9..a5731c9f9c 100644
--- a/editor/debugger/script_editor_debugger.h
+++ b/editor/debugger/script_editor_debugger.h
@@ -74,6 +74,11 @@ private:
 		PROFILER_SCRIPTS_SERVERS
 	};
 
+	enum Actions {
+		ACTION_COPY_ERROR,
+		ACTION_OPEN_SOURCE,
+	};
+
 	AcceptDialog *msgdialog;
 
 	LineEdit *clicked_ctrl;
diff --git a/editor/editor_about.cpp b/editor/editor_about.cpp
index 2ed937b6ff..d962658484 100644
--- a/editor/editor_about.cpp
+++ b/editor/editor_about.cpp
@@ -38,16 +38,15 @@
 #include "core/version_hash.gen.h"
 
 void EditorAbout::_theme_changed() {
-	Control *base = EditorNode::get_singleton()->get_gui_base();
-	Ref<Font> font = base->get_theme_font("source", "EditorFonts");
-	int font_size = base->get_theme_font_size("source_size", "EditorFonts");
+	const Ref<Font> font = get_theme_font("source", "EditorFonts");
+	const int font_size = get_theme_font_size("source_size", "EditorFonts");
 	_tpl_text->add_theme_font_override("normal_font", font);
 	_tpl_text->add_theme_font_size_override("normal_font_size", font_size);
 	_tpl_text->add_theme_constant_override("line_separation", 6 * EDSCALE);
 	_license_text->add_theme_font_override("normal_font", font);
 	_license_text->add_theme_font_size_override("normal_font_size", font_size);
 	_license_text->add_theme_constant_override("line_separation", 6 * EDSCALE);
-	_logo->set_texture(base->get_theme_icon("Logo", "EditorIcons"));
+	_logo->set_texture(get_theme_icon("Logo", "EditorIcons"));
 }
 
 void EditorAbout::_notification(int p_what) {
diff --git a/editor/editor_about.h b/editor/editor_about.h
index efb7245e78..2823220a8a 100644
--- a/editor/editor_about.h
+++ b/editor/editor_about.h
@@ -44,6 +44,10 @@
 
 #include "editor_scale.h"
 
+/**
+ * NOTE: Do not assume the EditorNode singleton to be available in this class' methods.
+ * EditorAbout is also used from the project manager where EditorNode isn't initialized.
+ */
 class EditorAbout : public AcceptDialog {
 	GDCLASS(EditorAbout, AcceptDialog);
 
diff --git a/editor/editor_file_system.cpp b/editor/editor_file_system.cpp
index fb0dc57501..59d3b09678 100644
--- a/editor/editor_file_system.cpp
+++ b/editor/editor_file_system.cpp
@@ -1922,6 +1922,11 @@ void EditorFileSystem::reimport_file_with_custom_parameters(const String &p_file
 	_reimport_file(p_file, &p_custom_params, p_importer);
 }
 
+void EditorFileSystem::_reimport_thread(uint32_t p_index, ImportThreadData *p_import_data) {
+	p_import_data->max_index = MAX(p_import_data->reimport_from + int(p_index), p_import_data->max_index);
+	_reimport_file(p_import_data->reimport_files[p_import_data->reimport_from + p_index].path);
+}
+
 void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 	{
 		// Ensure that ProjectSettings::IMPORTED_FILES_PATH exists.
@@ -1939,7 +1944,8 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 	importing = true;
 	EditorProgress pr("reimport", TTR("(Re)Importing Assets"), p_files.size());
 
-	Vector<ImportFile> files;
+	Vector<ImportFile> reimport_files;
+
 	Set<String> groups_to_reimport;
 
 	for (int i = 0; i < p_files.size(); i++) {
@@ -1957,8 +1963,8 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 			//it's a regular file
 			ImportFile ifile;
 			ifile.path = p_files[i];
-			ifile.order = ResourceFormatImporter::get_singleton()->get_import_order(p_files[i]);
-			files.push_back(ifile);
+			ResourceFormatImporter::get_singleton()->get_import_order_threads_and_importer(p_files[i], ifile.order, ifile.threaded, ifile.importer);
+			reimport_files.push_back(ifile);
 		}
 
 		//group may have changed, so also update group reference
@@ -1969,11 +1975,51 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 		}
 	}
 
-	files.sort();
+	reimport_files.sort();
 
-	for (int i = 0; i < files.size(); i++) {
-		pr.step(files[i].path.get_file(), i);
-		_reimport_file(files[i].path);
+	bool use_threads = GLOBAL_GET("editor/import/use_multiple_threads");
+
+	int from = 0;
+	for (int i = 0; i < reimport_files.size(); i++) {
+		if (use_threads && reimport_files[i].threaded) {
+			if (i + 1 == reimport_files.size() || reimport_files[i + 1].importer != reimport_files[from].importer) {
+				if (from - i == 0) {
+					//single file, do not use threads
+					pr.step(reimport_files[i].path.get_file(), i);
+					_reimport_file(reimport_files[i].path);
+				} else {
+					Ref<ResourceImporter> importer = ResourceFormatImporter::get_singleton()->get_importer_by_name(reimport_files[from].importer);
+					ERR_CONTINUE(!importer.is_valid());
+
+					importer->import_threaded_begin();
+
+					ImportThreadData data;
+					data.max_index = from;
+					data.reimport_from = from;
+					data.reimport_files = reimport_files.ptr();
+
+					import_threads.begin_work(i - from + 1, this, &EditorFileSystem::_reimport_thread, &data);
+					int current_index = from - 1;
+					do {
+						if (current_index < data.max_index) {
+							current_index = data.max_index;
+							pr.step(reimport_files[current_index].path.get_file(), current_index);
+						}
+						OS::get_singleton()->delay_usec(1);
+					} while (!import_threads.is_done_dispatching());
+
+					import_threads.end_work();
+
+					importer->import_threaded_end();
+				}
+
+				from = i + 1;
+			}
+
+		} else {
+			pr.step(reimport_files[i].path.get_file(), i);
+			_reimport_file(reimport_files[i].path);
+		}
 	}
 
 	//reimport groups
@@ -2111,7 +2157,7 @@ void EditorFileSystem::_update_extensions() {
 EditorFileSystem::EditorFileSystem() {
 	ResourceLoader::import = _resource_import;
 	reimport_on_missing_imported_files = GLOBAL_DEF("editor/import/reimport_missing_imported_files", true);
-
+	GLOBAL_DEF("editor/import/use_multiple_threads", true);
 	singleton = this;
 	filesystem = memnew(EditorFileSystemDirectory); //like, empty
 	filesystem->parent = nullptr;
@@ -2138,7 +2184,9 @@ EditorFileSystem::EditorFileSystem() {
 	first_scan = true;
 	scan_changes_pending = false;
 	revalidate_import_files = false;
+	import_threads.init();
 }
 
 EditorFileSystem::~EditorFileSystem() {
+	import_threads.finish();
 }
diff --git a/editor/editor_file_system.h b/editor/editor_file_system.h
index 6f4f058503..9c9076106c 100644
--- a/editor/editor_file_system.h
+++ b/editor/editor_file_system.h
@@ -36,7 +36,9 @@
 #include "core/os/thread_safe.h"
 #include "core/templates/safe_refcount.h"
 #include "core/templates/set.h"
+#include "core/templates/thread_work_pool.h"
 #include "scene/main/node.h"
+
 class FileAccess;
 
 struct EditorProgressBG;
@@ -214,9 +216,11 @@ class EditorFileSystem : public Node {
 
 	struct ImportFile {
 		String path;
+		String importer;
+		bool threaded = false;
 		int order = 0;
 		bool operator<(const ImportFile &p_if) const {
-			return order < p_if.order;
+			return order == p_if.order ? (importer < p_if.importer) : (order < p_if.order);
 		}
 	};
 
@@ -236,6 +240,16 @@ class EditorFileSystem : public Node {
 
 	Set<String> group_file_cache;
 
+	ThreadWorkPool import_threads;
+
+	struct ImportThreadData {
+		const ImportFile *reimport_files;
+		int reimport_from;
+		int max_index = 0;
+	};
+
+	void _reimport_thread(uint32_t p_index, ImportThreadData *p_import_data);
+
 protected:
 	void _notification(int p_what);
 	static void _bind_methods();
diff --git a/editor/editor_help.cpp b/editor/editor_help.cpp
index a747652a2f..6039f64b7c 100644
--- a/editor/editor_help.cpp
+++ b/editor/editor_help.cpp
@@ -1801,7 +1801,7 @@ void FindBar::popup_search() {
 
 	if (!search_text->get_text().is_empty()) {
 		search_text->select_all();
-		search_text->set_cursor_position(search_text->get_text().length());
+		search_text->set_caret_column(search_text->get_text().length());
 		if (grabbed_focus) {
 			_search();
 		}
diff --git a/editor/editor_themes.cpp b/editor/editor_themes.cpp
index 4c5c3af765..7cc9ebd63e 100644
--- a/editor/editor_themes.cpp
+++ b/editor/editor_themes.cpp
@@ -963,7 +963,7 @@ Ref<Theme> create_editor_theme(const Ref<Theme> p_theme) {
 	theme->set_color("read_only", "LineEdit", font_disabled_color);
 	theme->set_color("font_color", "LineEdit", font_color);
 	theme->set_color("font_selected_color", "LineEdit", mono_color);
-	theme->set_color("cursor_color", "LineEdit", font_color);
+	theme->set_color("caret_color", "LineEdit", font_color);
 	theme->set_color("selection_color", "LineEdit", selection_color);
 	theme->set_color("clear_button_color", "LineEdit", font_color);
 	theme->set_color("clear_button_color_pressed", "LineEdit", accent_color);
diff --git a/editor/import/resource_importer_scene.cpp b/editor/import/resource_importer_scene.cpp
index 9041b815ca..4bb56beaeb 100644
--- a/editor/import/resource_importer_scene.cpp
+++ b/editor/import/resource_importer_scene.cpp
@@ -1241,7 +1241,7 @@ void ResourceImporterScene::_generate_meshes(Node *p_node, const Dictionary &p_m
 			if (mesh.is_valid()) {
 				mesh_node->set_mesh(mesh);
 				for (int i = 0; i < mesh->get_surface_count(); i++) {
-					mesh_node->set_surface_material(i, src_mesh_node->get_surface_material(i));
+					mesh_node->set_surface_override_material(i, src_mesh_node->get_surface_material(i));
 				}
 			}
 		}
diff --git a/editor/import/resource_importer_scene.h b/editor/import/resource_importer_scene.h
index 6c6af57c4c..00039f2ac6 100644
--- a/editor/import/resource_importer_scene.h
+++ b/editor/import/resource_importer_scene.h
@@ -173,6 +173,8 @@ public:
 	virtual bool has_advanced_options() const override;
 	virtual void show_advanced_options(const String &p_path) override;
 
+	virtual bool can_import_threaded() const override { return false; }
+
 	ResourceImporterScene();
 };
 
diff --git a/editor/plugins/animation_blend_space_1d_editor.cpp b/editor/plugins/animation_blend_space_1d_editor.cpp
index 025fcaf818..f7c0ebcfaf 100644
--- a/editor/plugins/animation_blend_space_1d_editor.cpp
+++ b/editor/plugins/animation_blend_space_1d_editor.cpp
@@ -698,7 +698,7 @@ AnimationNodeBlendSpace1DEditor::AnimationNodeBlendSpace1DEditor() {
 		max_value->set_step(0.01);
 
 		label_value = memnew(LineEdit);
-		label_value->set_expand_to_text_length(true);
+		label_value->set_expand_to_text_length_enabled(true);
 
 		// now add
 
diff --git a/editor/plugins/animation_blend_space_2d_editor.cpp b/editor/plugins/animation_blend_space_2d_editor.cpp
index af9c391174..e719df53d5 100644
--- a/editor/plugins/animation_blend_space_2d_editor.cpp
+++ b/editor/plugins/animation_blend_space_2d_editor.cpp
@@ -942,7 +942,7 @@ AnimationNodeBlendSpace2DEditor::AnimationNodeBlendSpace2DEditor() {
 		left_vbox->add_spacer();
 		label_y = memnew(LineEdit);
 		left_vbox->add_child(label_y);
-		label_y->set_expand_to_text_length(true);
+		label_y->set_expand_to_text_length_enabled(true);
 		left_vbox->add_spacer();
 		min_y_value = memnew(SpinBox);
 		left_vbox->add_child(min_y_value);
@@ -978,7 +978,7 @@ AnimationNodeBlendSpace2DEditor::AnimationNodeBlendSpace2DEditor() {
 		bottom_vbox->add_spacer();
 		label_x = memnew(LineEdit);
 		bottom_vbox->add_child(label_x);
-		label_x->set_expand_to_text_length(true);
+		label_x->set_expand_to_text_length_enabled(true);
 		bottom_vbox->add_spacer();
 		max_x_value = memnew(SpinBox);
 		bottom_vbox->add_child(max_x_value);
diff --git a/editor/plugins/animation_blend_tree_editor_plugin.cpp b/editor/plugins/animation_blend_tree_editor_plugin.cpp
index fdbbe5184b..48fb507bb1 100644
--- a/editor/plugins/animation_blend_tree_editor_plugin.cpp
+++ b/editor/plugins/animation_blend_tree_editor_plugin.cpp
@@ -136,7 +136,7 @@ void AnimationNodeBlendTreeEditor::_update_graph() {
 		if (String(E->get()) != "output") {
 			LineEdit *name = memnew(LineEdit);
 			name->set_text(E->get());
-			name->set_expand_to_text_length(true);
+			name->set_expand_to_text_length_enabled(true);
 			node->add_child(name);
 			node->set_slot(0, false, 0, Color(), true, 0, get_theme_color("font_color", "Label"));
 			name->connect("text_entered", callable_mp(this, &AnimationNodeBlendTreeEditor::_node_renamed), varray(agnode), CONNECT_DEFERRED);
diff --git a/editor/plugins/canvas_item_editor_plugin.cpp b/editor/plugins/canvas_item_editor_plugin.cpp
index b678197037..fc3e15aa52 100644
--- a/editor/plugins/canvas_item_editor_plugin.cpp
+++ b/editor/plugins/canvas_item_editor_plugin.cpp
@@ -804,11 +804,15 @@ void CanvasItemEditor::_find_canvas_items_in_rect(const Rect2 &p_rect, Node *p_n
 
 bool CanvasItemEditor::_select_click_on_item(CanvasItem *item, Point2 p_click_pos, bool p_append) {
 	bool still_selected = true;
-	if (p_append) {
+	if (p_append && !editor_selection->get_selected_node_list().is_empty()) {
 		if (editor_selection->is_selected(item)) {
 			// Already in the selection, remove it from the selected nodes
 			editor_selection->remove_node(item);
 			still_selected = false;
+
+			if (editor_selection->get_selected_node_list().size() == 1) {
+				editor->push_item(editor_selection->get_selected_node_list()[0]);
+			}
 		} else {
 			// Add the item to the selection
 			editor_selection->add_node(item);
@@ -2589,6 +2593,9 @@ bool CanvasItemEditor::_gui_input_select(const Ref<InputEvent> &p_event) {
 				}
 
 				_find_canvas_items_in_rect(Rect2(bsfrom, bsto - bsfrom), scene, &selitems);
+				if (selitems.size() == 1 && editor_selection->get_selected_node_list().is_empty()) {
+					editor->push_item(selitems[0]);
+				}
 				for (List<CanvasItem *>::Element *E = selitems.front(); E; E = E->next()) {
 					editor_selection->add_node(E->get());
 				}
@@ -5378,9 +5385,6 @@ void CanvasItemEditor::_focus_selection(int p_op) {
 			rect = rect.merge(canvas_item_rect);
 		}
 	};
-	if (count == 0) {
-		return;
-	}
 
 	if (p_op == VIEW_CENTER_TO_SELECTION) {
 		center = rect.position + rect.size / 2;
diff --git a/editor/plugins/mesh_library_editor_plugin.cpp b/editor/plugins/mesh_library_editor_plugin.cpp
index f8932cd534..6f1f243444 100644
--- a/editor/plugins/mesh_library_editor_plugin.cpp
+++ b/editor/plugins/mesh_library_editor_plugin.cpp
@@ -93,7 +93,7 @@ void MeshLibraryEditor::_import_scene(Node *p_scene, Ref<MeshLibrary> p_library,
 
 		mesh = mesh->duplicate();
 		for (int j = 0; j < mesh->get_surface_count(); ++j) {
-			Ref<Material> mat = mi->get_surface_material(j);
+			Ref<Material> mat = mi->get_surface_override_material(j);
 
 			if (mat.is_valid()) {
 				mesh->surface_set_material(j, mat);
diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp
index 3df092bc13..13c7814dac 100644
--- a/editor/plugins/node_3d_editor_plugin.cpp
+++ b/editor/plugins/node_3d_editor_plugin.cpp
@@ -3560,10 +3560,6 @@ void Node3DEditorViewport::reset() {
 }
 
 void Node3DEditorViewport::focus_selection() {
-	if (!get_selected_count()) {
-		return;
-	}
-
 	Vector3 center;
 	int count = 0;
 
diff --git a/editor/plugins/shader_editor_plugin.cpp b/editor/plugins/shader_editor_plugin.cpp
index 8f8a4b3054..ed3b746678 100644
--- a/editor/plugins/shader_editor_plugin.cpp
+++ b/editor/plugins/shader_editor_plugin.cpp
@@ -205,7 +205,7 @@ void ShaderTextEditor::_code_complete_script(const String &p_code, List<ScriptCo
 	ShaderLanguage sl;
 	String calltip;
 
-	sl.complete(p_code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type, r_options, calltip);
+	sl.complete(p_code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(shader->get_mode())), ShaderLanguage::VaryingFunctionNames(), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type, r_options, calltip);
 
 	get_text_editor()->set_code_hint(calltip);
 }
@@ -219,7 +219,7 @@ void ShaderTextEditor::_validate_script() {
 
 	ShaderLanguage sl;
 
-	Error err = sl.compile(code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type);
+	Error err = sl.compile(code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(shader->get_mode())), ShaderLanguage::VaryingFunctionNames(), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type);
 
 	if (err != OK) {
 		String error_text = "error(" + itos(sl.get_error_line()) + "): " + sl.get_error_text();
diff --git a/editor/plugins/visual_shader_editor_plugin.cpp b/editor/plugins/visual_shader_editor_plugin.cpp
index b2fa9c540e..dea85e8799 100644
--- a/editor/plugins/visual_shader_editor_plugin.cpp
+++ b/editor/plugins/visual_shader_editor_plugin.cpp
@@ -1133,16 +1133,24 @@ void VisualShaderEditor::_update_options_menu() {
 }
 
 void VisualShaderEditor::_set_mode(int p_which) {
-	if (p_which == VisualShader::MODE_PARTICLES) {
+	if (p_which == VisualShader::MODE_SKY) {
+		edit_type_standart->set_visible(false);
+		edit_type_particles->set_visible(false);
+		edit_type_sky->set_visible(true);
+		edit_type = edit_type_sky;
+		mode = MODE_FLAGS_SKY;
+	} else if (p_which == VisualShader::MODE_PARTICLES) {
 		edit_type_standart->set_visible(false);
 		edit_type_particles->set_visible(true);
+		edit_type_sky->set_visible(false);
 		edit_type = edit_type_particles;
-		particles_mode = true;
+		mode = MODE_FLAGS_PARTICLES;
 	} else {
 		edit_type_particles->set_visible(false);
 		edit_type_standart->set_visible(true);
+		edit_type_sky->set_visible(false);
 		edit_type = edit_type_standart;
-		particles_mode = false;
+		mode = MODE_FLAGS_SPATIAL_CANVASITEM;
 	}
 	visual_shader->set_shader_type(get_current_shader_type());
 }
@@ -1303,8 +1311,10 @@ void VisualShaderEditor::_update_graph() {
 
 VisualShader::Type VisualShaderEditor::get_current_shader_type() const {
 	VisualShader::Type type;
-	if (particles_mode) {
+	if (mode & MODE_FLAGS_PARTICLES) {
 		type = VisualShader::Type(edit_type->get_selected() + 3);
+	} else if (mode & MODE_FLAGS_SKY) {
+		type = VisualShader::Type(edit_type->get_selected() + 6);
 	} else {
 		type = VisualShader::Type(edit_type->get_selected());
 	}
@@ -3025,7 +3035,14 @@ void VisualShaderEditor::_paste_nodes(bool p_use_custom_position, const Vector2
 }
 
 void VisualShaderEditor::_mode_selected(int p_id) {
-	visual_shader->set_shader_type(particles_mode ? VisualShader::Type(p_id + 3) : VisualShader::Type(p_id));
+	int offset = 0;
+	if (mode & MODE_FLAGS_PARTICLES) {
+		offset = 3;
+	} else if (mode & MODE_FLAGS_SKY) {
+		offset = 6;
+	}
+
+	visual_shader->set_shader_type(VisualShader::Type(p_id + offset));
 	_update_options_menu();
 	_update_graph();
 }
@@ -3398,7 +3415,7 @@ void VisualShaderEditor::_update_preview() {
 
 	ShaderLanguage sl;
 
-	Error err = sl.compile(code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(visual_shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(visual_shader->get_mode())), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type);
+	Error err = sl.compile(code, ShaderTypes::get_singleton()->get_functions(RenderingServer::ShaderMode(visual_shader->get_mode())), ShaderTypes::get_singleton()->get_modes(RenderingServer::ShaderMode(visual_shader->get_mode())), ShaderLanguage::VaryingFunctionNames(), ShaderTypes::get_singleton()->get_types(), _get_global_variable_type);
 
 	for (int i = 0; i < preview_text->get_line_count(); i++) {
 		preview_text->set_line_as_marked(i, false);
@@ -3531,10 +3548,17 @@ VisualShaderEditor::VisualShaderEditor() {
 	edit_type_particles->select(0);
 	edit_type_particles->connect("item_selected", callable_mp(this, &VisualShaderEditor::_mode_selected));
 
+	edit_type_sky = memnew(OptionButton);
+	edit_type_sky->add_item(TTR("Sky"));
+	edit_type_sky->select(0);
+	edit_type_sky->connect("item_selected", callable_mp(this, &VisualShaderEditor::_mode_selected));
+
 	edit_type = edit_type_standart;
 
 	graph->get_zoom_hbox()->add_child(edit_type_particles);
 	graph->get_zoom_hbox()->move_child(edit_type_particles, 0);
+	graph->get_zoom_hbox()->add_child(edit_type_sky);
+	graph->get_zoom_hbox()->move_child(edit_type_sky, 0);
 	graph->get_zoom_hbox()->add_child(edit_type_standart);
 	graph->get_zoom_hbox()->move_child(edit_type_standart, 0);
 
@@ -3671,7 +3695,7 @@ VisualShaderEditor::VisualShaderEditor() {
 
 	comment_title_change_popup = memnew(PopupPanel);
 	comment_title_change_edit = memnew(LineEdit);
-	comment_title_change_edit->set_expand_to_text_length(true);
+	comment_title_change_edit->set_expand_to_text_length_enabled(true);
 	comment_title_change_edit->connect("text_changed", callable_mp(this, &VisualShaderEditor::_comment_title_text_changed));
 	comment_title_change_edit->connect("text_entered", callable_mp(this, &VisualShaderEditor::_comment_title_text_entered));
 	comment_title_change_popup->add_child(comment_title_change_edit);
@@ -3782,6 +3806,7 @@ VisualShaderEditor::VisualShaderEditor() {
 	const String input_param_for_vertex_and_fragment_shader_modes = TTR("'%s' input parameter for vertex and fragment shader modes.");
 	const String input_param_for_fragment_and_light_shader_modes = TTR("'%s' input parameter for fragment and light shader modes.");
 	const String input_param_for_fragment_shader_mode = TTR("'%s' input parameter for fragment shader mode.");
+	const String input_param_for_sky_shader_mode = TTR("'%s' input parameter for sky shader mode.");
 	const String input_param_for_light_shader_mode = TTR("'%s' input parameter for light shader mode.");
 	const String input_param_for_vertex_shader_mode = TTR("'%s' input parameter for vertex shader mode.");
 	const String input_param_for_emit_shader_mode = TTR("'%s' input parameter for emit shader mode.");
@@ -3911,35 +3936,35 @@ VisualShaderEditor::VisualShaderEditor() {
 
 	// SKY INPUTS
 
-	add_options.push_back(AddOption("AtCubeMapPass", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "at_cubemap_pass"), "at_cubemap_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("AtHalfResPass", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "at_half_res_pass"), "at_half_res_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("AtQuarterResPass", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "at_quarter_res_pass"), "at_quarter_res_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("EyeDir", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "eyedir"), "eyedir", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("HalfResColor", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "half_res_color"), "half_res_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("HalfResAlpha", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "half_res_alpha"), "half_res_alpha", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light0Color", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light0_color"), "light0_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light0Direction", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light0_direction"), "light0_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light0Enabled", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light0_enabled"), "light0_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light0Energy", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light0_energy"), "light0_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light1Color", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light1_color"), "light1_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light1Direction", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light1_direction"), "light1_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light1Enabled", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light1_enabled"), "light1_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light1Energy", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light1_energy"), "light1_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light2Color", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light2_color"), "light2_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light2Direction", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light2_direction"), "light2_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light2Enabled", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light2_enabled"), "light2_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light2Energy", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light2_energy"), "light2_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light3Color", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light3_color"), "light3_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light3Direction", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light3_direction"), "light3_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light3Enabled", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light3_enabled"), "light3_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Light3Energy", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "light3_energy"), "light3_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Position", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "position"), "position", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("QuarterResColor", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "quarter_res_color"), "quarter_res_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("QuarterResAlpha", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "quarter_res_alpha"), "quarter_res_alpha", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Radiance", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "radiance"), "radiance", VisualShaderNode::PORT_TYPE_SAMPLER, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("ScreenUV", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "screen_uv"), "screen_uv", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("SkyCoords", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "sky_coords"), "sky_coords", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
-	add_options.push_back(AddOption("Time", "Input", "Fragment", "VisualShaderNodeInput", vformat(input_param_for_fragment_shader_mode, "time"), "time", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_FRAGMENT, Shader::MODE_SKY));
+	add_options.push_back(AddOption("AtCubeMapPass", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "at_cubemap_pass"), "at_cubemap_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("AtHalfResPass", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "at_half_res_pass"), "at_half_res_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("AtQuarterResPass", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "at_quarter_res_pass"), "at_quarter_res_pass", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("EyeDir", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "eyedir"), "eyedir", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("HalfResColor", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "half_res_color"), "half_res_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("HalfResAlpha", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "half_res_alpha"), "half_res_alpha", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light0Color", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light0_color"), "light0_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light0Direction", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light0_direction"), "light0_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light0Enabled", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light0_enabled"), "light0_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light0Energy", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light0_energy"), "light0_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light1Color", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light1_color"), "light1_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light1Direction", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light1_direction"), "light1_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light1Enabled", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light1_enabled"), "light1_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light1Energy", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light1_energy"), "light1_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light2Color", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light2_color"), "light2_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light2Direction", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light2_direction"), "light2_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light2Enabled", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light2_enabled"), "light2_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light2Energy", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light2_energy"), "light2_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light3Color", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light3_color"), "light3_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light3Direction", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light3_direction"), "light3_direction", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light3Enabled", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light3_enabled"), "light3_enabled", VisualShaderNode::PORT_TYPE_BOOLEAN, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Light3Energy", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "light3_energy"), "light3_energy", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Position", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "position"), "position", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("QuarterResColor", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "quarter_res_color"), "quarter_res_color", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("QuarterResAlpha", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "quarter_res_alpha"), "quarter_res_alpha", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Radiance", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "radiance"), "radiance", VisualShaderNode::PORT_TYPE_SAMPLER, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("ScreenUV", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "screen_uv"), "screen_uv", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("SkyCoords", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "sky_coords"), "sky_coords", VisualShaderNode::PORT_TYPE_VECTOR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
+	add_options.push_back(AddOption("Time", "Input", "Sky", "VisualShaderNodeInput", vformat(input_param_for_sky_shader_mode, "time"), "time", VisualShaderNode::PORT_TYPE_SCALAR, TYPE_FLAGS_SKY, Shader::MODE_SKY));
 
 	// SCALAR
 
diff --git a/editor/plugins/visual_shader_editor_plugin.h b/editor/plugins/visual_shader_editor_plugin.h
index 517dc6056f..6d57d38cab 100644
--- a/editor/plugins/visual_shader_editor_plugin.h
+++ b/editor/plugins/visual_shader_editor_plugin.h
@@ -141,6 +141,7 @@ class VisualShaderEditor : public VBoxContainer {
 	OptionButton *edit_type = nullptr;
 	OptionButton *edit_type_standart;
 	OptionButton *edit_type_particles;
+	OptionButton *edit_type_sky;
 
 	PanelContainer *error_panel;
 	Label *error_label;
@@ -169,7 +170,14 @@ class VisualShaderEditor : public VBoxContainer {
 
 	bool preview_first = true;
 	bool preview_showed = false;
-	bool particles_mode;
+
+	enum ShaderModeFlags {
+		MODE_FLAGS_SPATIAL_CANVASITEM = 1,
+		MODE_FLAGS_SKY = 2,
+		MODE_FLAGS_PARTICLES = 4
+	};
+
+	int mode = MODE_FLAGS_SPATIAL_CANVASITEM;
 
 	enum TypeFlags {
 		TYPE_FLAGS_VERTEX = 1,
@@ -183,6 +191,10 @@ class VisualShaderEditor : public VBoxContainer {
 		TYPE_FLAGS_END = 4
 	};
 
+	enum SkyTypeFlags {
+		TYPE_FLAGS_SKY = 1,
+	};
+
 	enum ToolsMenuOptions {
 		EXPAND_ALL,
 		COLLAPSE_ALL
diff --git a/editor/project_manager.cpp b/editor/project_manager.cpp
index d3def86bd1..e51e8ee82e 100644
--- a/editor/project_manager.cpp
+++ b/editor/project_manager.cpp
@@ -296,7 +296,7 @@ private:
 		String sp = _test_path();
 		if (sp != "") {
 			// If the project name is empty or default, infer the project name from the selected folder name
-			if (project_name->get_text() == "" || project_name->get_text() == TTR("New Game Project")) {
+			if (project_name->get_text().strip_edges() == "" || project_name->get_text().strip_edges() == TTR("New Game Project")) {
 				sp = sp.replace("\\", "/");
 				int lidx = sp.rfind("/");
 
@@ -380,16 +380,17 @@ private:
 	}
 
 	void _create_folder() {
-		if (project_name->get_text() == "" || created_folder_path != "" || project_name->get_text().ends_with(".") || project_name->get_text().ends_with(" ")) {
-			set_message(TTR("Invalid Project Name."), MESSAGE_WARNING);
+		const String project_name_no_edges = project_name->get_text().strip_edges();
+		if (project_name_no_edges == "" || created_folder_path != "" || project_name_no_edges.ends_with(".")) {
+			set_message(TTR("Invalid project name."), MESSAGE_WARNING);
 			return;
 		}
 
 		DirAccess *d = DirAccess::create(DirAccess::ACCESS_FILESYSTEM);
 		if (d->change_dir(project_path->get_text()) == OK) {
-			if (!d->dir_exists(project_name->get_text())) {
-				if (d->make_dir(project_name->get_text()) == OK) {
-					d->change_dir(project_name->get_text());
+			if (!d->dir_exists(project_name_no_edges)) {
+				if (d->make_dir(project_name_no_edges) == OK) {
+					d->change_dir(project_name_no_edges);
 					String dir_str = d->get_current_dir();
 					project_path->set_text(dir_str);
 					_path_text_changed(dir_str);
@@ -415,7 +416,7 @@ private:
 
 		_test_path();
 
-		if (p_text == "") {
+		if (p_text.strip_edges() == "") {
 			set_message(TTR("It would be a good idea to name your project."), MESSAGE_ERROR);
 		}
 	}
@@ -442,7 +443,7 @@ private:
 				set_message(vformat(TTR("Couldn't load project.godot in project path (error %d). It may be missing or corrupted."), err), MESSAGE_ERROR);
 			} else {
 				ProjectSettings::CustomMap edited_settings;
-				edited_settings["application/config/name"] = project_name->get_text();
+				edited_settings["application/config/name"] = project_name->get_text().strip_edges();
 
 				if (current->save_custom(dir2.plus_file("project.godot"), edited_settings, Vector<String>(), true) != OK) {
 					set_message(TTR("Couldn't edit project.godot in project path."), MESSAGE_ERROR);
@@ -483,7 +484,7 @@ private:
 						initial_settings["rendering/textures/vram_compression/import_etc2"] = false;
 						initial_settings["rendering/textures/vram_compression/import_etc"] = true;
 					}
-					initial_settings["application/config/name"] = project_name->get_text();
+					initial_settings["application/config/name"] = project_name->get_text().strip_edges();
 					initial_settings["application/config/icon"] = "res://icon.png";
 					initial_settings["rendering/environment/defaults/default_environment"] = "res://default_env.tres";
 
@@ -1851,6 +1852,9 @@ void ProjectManager::_notification(int p_what) {
 		case NOTIFICATION_WM_CLOSE_REQUEST: {
 			_dim_window();
 		} break;
+		case NOTIFICATION_WM_ABOUT: {
+			_show_about();
+		} break;
 	}
 }
 
@@ -2254,6 +2258,10 @@ void ProjectManager::_erase_missing_projects() {
 	erase_missing_ask->popup_centered();
 }
 
+void ProjectManager::_show_about() {
+	about->popup_centered(Size2(780, 500) * EDSCALE);
+}
+
 void ProjectManager::_language_selected(int p_id) {
 	String lang = language_btn->get_item_metadata(p_id);
 	EditorSettings::get_singleton()->set("interface/editor/editor_language", lang);
@@ -2443,12 +2451,7 @@ ProjectManager::ProjectManager() {
 	}
 
 	// TRANSLATORS: This refers to the application where users manage their Godot projects.
-	if (TS->is_locale_right_to_left(TranslationServer::get_singleton()->get_tool_locale())) {
-		// For RTL languages, embed translated part of the title (using control characters) to ensure correct order.
-		DisplayServer::get_singleton()->window_set_title(VERSION_NAME + String(" - ") + String::chr(0x202B) + TTR("Project Manager") + String::chr(0x202C) + String::chr(0x200E) + " - " + String::chr(0xA9) + " 2007-2021 Juan Linietsky, Ariel Manzur & Godot Contributors");
-	} else {
-		DisplayServer::get_singleton()->window_set_title(VERSION_NAME + String(" - ") + TTR("Project Manager") + " - " + String::chr(0xA9) + " 2007-2021 Juan Linietsky, Ariel Manzur & Godot Contributors");
-	}
+	DisplayServer::get_singleton()->window_set_title(VERSION_NAME + String(" - ") + TTR("Project Manager"));
 
 	FileDialog::set_default_show_hidden_files(EditorSettings::get_singleton()->get("filesystem/file_dialog/show_hidden_files"));
 
@@ -2582,6 +2585,13 @@ ProjectManager::ProjectManager() {
 		erase_missing_btn->set_text(TTR("Remove Missing"));
 		erase_missing_btn->connect("pressed", callable_mp(this, &ProjectManager::_erase_missing_projects));
 		tree_vb->add_child(erase_missing_btn);
+
+		tree_vb->add_spacer();
+
+		about_btn = memnew(Button);
+		about_btn->set_text(TTR("About"));
+		about_btn->connect("pressed", callable_mp(this, &ProjectManager::_show_about));
+		tree_vb->add_child(about_btn);
 	}
 
 	{
@@ -2715,6 +2725,9 @@ ProjectManager::ProjectManager() {
 		open_templates->get_ok_button()->set_text(TTR("Open Asset Library"));
 		open_templates->connect("confirmed", callable_mp(this, &ProjectManager::_open_asset_library));
 		add_child(open_templates);
+
+		about = memnew(EditorAbout);
+		add_child(about);
 	}
 
 	_load_recent_projects();
diff --git a/editor/project_manager.h b/editor/project_manager.h
index d13315c022..a66b7c4ab6 100644
--- a/editor/project_manager.h
+++ b/editor/project_manager.h
@@ -31,6 +31,7 @@
 #ifndef PROJECT_MANAGER_H
 #define PROJECT_MANAGER_H
 
+#include "editor/editor_about.h"
 #include "editor/plugins/asset_library_editor_plugin.h"
 #include "scene/gui/dialogs.h"
 #include "scene/gui/file_dialog.h"
@@ -62,6 +63,7 @@ class ProjectManager : public Control {
 	Button *rename_btn;
 	Button *erase_btn;
 	Button *erase_missing_btn;
+	Button *about_btn;
 
 	EditorAssetLibrary *asset_library;
 
@@ -78,6 +80,7 @@ class ProjectManager : public Control {
 	ConfirmationDialog *multi_scan_ask;
 	ConfirmationDialog *ask_update_settings;
 	ConfirmationDialog *open_templates;
+	EditorAbout *about;
 
 	HBoxContainer *settings_hb;
 
@@ -100,6 +103,7 @@ class ProjectManager : public Control {
 	void _erase_missing_projects();
 	void _erase_project_confirm();
 	void _erase_missing_projects_confirm();
+	void _show_about();
 	void _update_project_buttons();
 	void _language_selected(int p_id);
 	void _restart_confirm();
diff --git a/editor/project_settings_editor.cpp b/editor/project_settings_editor.cpp
index de7996eaa2..faec3355ac 100644
--- a/editor/project_settings_editor.cpp
+++ b/editor/project_settings_editor.cpp
@@ -102,10 +102,9 @@ void ProjectSettingsEditor::_add_setting() {
 	String setting = _get_setting_name();
 
 	// Initialize the property with the default value for the given type.
-	// The type list starts at 1 (as we exclude Nil), so add 1 to the selected value.
 	Callable::CallError ce;
 	Variant value;
-	Variant::construct(Variant::Type(type->get_selected() + 1), value, nullptr, 0, ce);
+	Variant::construct(Variant::Type(type->get_selected_id()), value, nullptr, 0, ce);
 
 	undo_redo->create_action(TTR("Add Project Setting"));
 	undo_redo->add_do_property(ps, setting, value);
@@ -584,7 +583,7 @@ ProjectSettingsEditor::ProjectSettingsEditor(EditorData *p_data) {
 			// There's no point in adding Nil types, and Object types
 			// can't be serialized correctly in the project settings.
 			if (i != Variant::NIL && i != Variant::OBJECT) {
-				type->add_item(Variant::get_type_name(Variant::Type(i)));
+				type->add_item(Variant::get_type_name(Variant::Type(i)), i);
 			}
 		}
 
diff --git a/editor/rename_dialog.cpp b/editor/rename_dialog.cpp
index b51524b299..0f15d4b119 100644
--- a/editor/rename_dialog.cpp
+++ b/editor/rename_dialog.cpp
@@ -632,7 +632,7 @@ void RenameDialog::_insert_text(String text) {
 
 	if (_is_main_field(focus_owner)) {
 		focus_owner->selection_delete();
-		focus_owner->append_at_cursor(text);
+		focus_owner->insert_text_at_caret(text);
 		_update_preview();
 	}
 }
diff --git a/editor/scene_tree_dock.cpp b/editor/scene_tree_dock.cpp
index 5e6ebc22a3..a6d1a118b8 100644
--- a/editor/scene_tree_dock.cpp
+++ b/editor/scene_tree_dock.cpp
@@ -140,7 +140,11 @@ void SceneTreeDock::instance_scenes(const Vector<String> &p_files, Node *p_paren
 		parent = scene_tree->get_selected();
 	}
 
-	if (!parent || !edited_scene) {
+	if (!parent) {
+		parent = edited_scene;
+	}
+
+	if (!parent) {
 		if (p_files.size() == 1) {
 			accept->set_text(TTR("No parent to instance a child at."));
 		} else {
diff --git a/editor/script_create_dialog.cpp b/editor/script_create_dialog.cpp
index b707f6c353..f3addd8904 100644
--- a/editor/script_create_dialog.cpp
+++ b/editor/script_create_dialog.cpp
@@ -87,8 +87,8 @@ void ScriptCreateDialog::_path_hbox_sorted() {
 
 		// First set cursor to the end of line to scroll LineEdit view
 		// to the right and then set the actual cursor position.
-		file_path->set_cursor_position(file_path->get_text().length());
-		file_path->set_cursor_position(filename_start_pos);
+		file_path->set_caret_column(file_path->get_text().length());
+		file_path->set_caret_column(filename_start_pos);
 
 		file_path->grab_focus();
 	}
@@ -238,6 +238,14 @@ String ScriptCreateDialog::_validate_path(const String &p_path, bool p_file_must
 	return "";
 }
 
+String ScriptCreateDialog::_get_class_name() const {
+	if (has_named_classes) {
+		return class_name->get_text();
+	} else {
+		return ProjectSettings::get_singleton()->localize_path(file_path->get_text()).get_file().get_basename();
+	}
+}
+
 void ScriptCreateDialog::_class_name_changed(const String &p_name) {
 	if (_validate_class(class_name->get_text())) {
 		is_class_name_valid = true;
@@ -287,13 +295,7 @@ void ScriptCreateDialog::ok_pressed() {
 }
 
 void ScriptCreateDialog::_create_new() {
-	String cname_param;
-
-	if (has_named_classes) {
-		cname_param = class_name->get_text();
-	} else {
-		cname_param = ProjectSettings::get_singleton()->localize_path(file_path->get_text()).get_file().get_basename();
-	}
+	String cname_param = _get_class_name();
 
 	Ref<Script> scr;
 	if (script_template != "") {
@@ -555,7 +557,7 @@ void ScriptCreateDialog::_file_selected(const String &p_file) {
 		String filename = p.get_file().get_basename();
 		int select_start = p.rfind(filename);
 		file_path->select(select_start, select_start + filename.length());
-		file_path->set_cursor_position(select_start + filename.length());
+		file_path->set_caret_column(select_start + filename.length());
 		file_path->grab_focus();
 	}
 }
@@ -687,6 +689,10 @@ void ScriptCreateDialog::_update_dialog() {
 
 	builtin_warning_label->set_visible(is_built_in);
 
+	// Check if the script name is the same as the parent class.
+	// This warning isn't relevant if the script is built-in.
+	script_name_warning_label->set_visible(!is_built_in && _get_class_name() == parent_name->get_text());
+
 	if (is_built_in) {
 		get_ok_button()->set_text(TTR("Create"));
 		parent_name->set_editable(true);
@@ -768,6 +774,14 @@ ScriptCreateDialog::ScriptCreateDialog() {
 	builtin_warning_label->set_autowrap(true);
 	builtin_warning_label->hide();
 
+	script_name_warning_label = memnew(Label);
+	script_name_warning_label->set_text(
+			TTR("Warning: Having the script name be the same as a built-in type is usually not desired."));
+	vb->add_child(script_name_warning_label);
+	script_name_warning_label->add_theme_color_override("font_color", Color(1, 0.85, 0.4));
+	script_name_warning_label->set_autowrap(true);
+	script_name_warning_label->hide();
+
 	status_panel = memnew(PanelContainer);
 	status_panel->set_h_size_flags(Control::SIZE_FILL);
 	status_panel->add_child(vb);
diff --git a/editor/script_create_dialog.h b/editor/script_create_dialog.h
index e898b6f927..d6417b9d33 100644
--- a/editor/script_create_dialog.h
+++ b/editor/script_create_dialog.h
@@ -50,6 +50,7 @@ class ScriptCreateDialog : public ConfirmationDialog {
 	Label *error_label;
 	Label *path_error_label;
 	Label *builtin_warning_label;
+	Label *script_name_warning_label;
 	PanelContainer *status_panel;
 	LineEdit *parent_name;
 	Button *parent_browse_button;
@@ -110,6 +111,7 @@ class ScriptCreateDialog : public ConfirmationDialog {
 	bool _validate_parent(const String &p_string);
 	bool _validate_class(const String &p_string);
 	String _validate_path(const String &p_path, bool p_file_must_exist);
+	String _get_class_name() const;
 	void _class_name_changed(const String &p_name);
 	void _parent_name_changed(const String &p_parent);
 	void _template_changed(int p_template = 0);
diff --git a/main/main.cpp b/main/main.cpp
index 4103fad17c..bf7b88bdc9 100644
--- a/main/main.cpp
+++ b/main/main.cpp
@@ -375,8 +375,8 @@ void Main::print_help(const char *p_binary) {
 #ifdef TESTS_ENABLED
 	OS::get_singleton()->print("  --test [--help]                              Run unit tests. Use --test --help for more information.\n");
 #endif
-	OS::get_singleton()->print("\n");
 #endif
+	OS::get_singleton()->print("\n");
 }
 
 #ifdef TESTS_ENABLED
@@ -390,6 +390,8 @@ Error Main::test_setup() {
 	register_core_types();
 	register_core_driver_types();
 
+	packed_data = memnew(PackedData);
+
 	globals = memnew(ProjectSettings);
 
 	GLOBAL_DEF("debug/settings/crash_handler/message",
@@ -459,6 +461,9 @@ void Main::test_cleanup() {
 	if (globals) {
 		memdelete(globals);
 	}
+	if (packed_data) {
+		memdelete(packed_data);
+	}
 	if (engine) {
 		memdelete(engine);
 	}
diff --git a/misc/dist/html/editor.html b/misc/dist/html/editor.html
index 4785f54973..347c22adf8 100644
--- a/misc/dist/html/editor.html
+++ b/misc/dist/html/editor.html
@@ -58,6 +58,29 @@
 			filter: brightness(82.5%);
 		}
 
+		.welcome-modal {
+			display: none;
+ 			position: fixed;
+			z-index: 1;
+			left: 0;
+			top: 0;
+			width: 100%;
+			height: 100%;
+			overflow: auto;
+			background-color: hsla(0, 0%, 0%, 0.5);
+		}
+
+		.welcome-modal-content {
+			background-color: #333b4f;
+			box-shadow: 0 0.25rem 0.25rem hsla(0, 0%, 0%, 0.5);
+			line-height: 1.5;
+			max-width: 38rem;
+			margin: 4rem auto 0 auto;
+			color: white;
+			border-radius: 0.5rem;
+			padding: 1rem 1rem 2rem 1rem;
+		}
+
 		#tabs-buttons {
 			/* Match the default background color of the editor window for a seamless appearance. */
 			background-color: #202531;
@@ -206,6 +229,36 @@
 	</style>
 </head>
 <body>
+	<div
+		id="welcome-modal"
+		class="welcome-modal"
+		role="dialog"
+		aria-labelledby="welcome-modal-title"
+		aria-describedby="welcome-modal-description"
+		onclick="if (event.target === this) closeWelcomeModal(false)"
+	>
+		<div class="welcome-modal-content">
+			<h2 id="welcome-modal-title">Important - Please read before continuing</h2>
+			<div id="welcome-modal-description">
+				<p>
+					The Godot Web Editor has some limitations compared to the native version.
+					Its main focus is education and experimentation;
+					<strong>it is not recommended for production</strong>.
+				</p>
+				<p>
+					Refer to the
+					<a
+						href="https://docs.godotengine.org/en/latest/tutorials/editor/using_the_web_editor.html"
+						target="_blank"
+						rel="noopener"
+					>Web editor documentation</a> for usage instructions and limitations.
+				</p>
+			</div>
+			<button id="welcome-modal-dismiss" class="btn" type="button" onclick="closeWelcomeModal(true)" style="margin-top: 1rem">
+				OK, don't show again
+			</button>
+		</div>
+	</div>
 	<div id="tabs-buttons">
 		<button id="btn-tab-loader" class="btn tab-btn" onclick="showTab('loader')">Loader</button>
 		<button id="btn-tab-editor" class="btn tab-btn" disabled="disabled" onclick="showTab('editor')">Editor</button>
@@ -274,7 +327,19 @@
 			if ("serviceWorker" in navigator) {
 				navigator.serviceWorker.register("service.worker.js");
 			}
+
+			if (localStorage.getItem("welcomeModalDismissed") !== 'true') {
+				document.getElementById("welcome-modal").style.display = "block";
+				document.getElementById("welcome-modal-dismiss").focus();
+			}
 		});
+
+		function closeWelcomeModal(dontShowAgain) {
+			document.getElementById("welcome-modal").style.display = "none";
+			if (dontShowAgain) {
+				localStorage.setItem("welcomeModalDismissed", 'true');
+			}
+		}
 	</script>
 	<script src="godot.tools.js"></script>
 	<script>//<![CDATA[
diff --git a/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json b/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
index 6bf2edb02d..c4f8f71d0e 100644
--- a/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
+++ b/misc/dist/osx_template.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
@@ -2,6 +2,6 @@
     "file_format_version" : "1.0.0",
     "ICD": {
         "library_path": "../../../Frameworks/libMoltenVK.dylib",
-        "api_version" : "1.0.0"
+        "api_version" : "1.1.0"
     }
 }
diff --git a/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json b/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
index 6bf2edb02d..c4f8f71d0e 100644
--- a/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
+++ b/misc/dist/osx_tools.app/Contents/Resources/vulkan/icd.d/MoltenVK_icd.json
@@ -2,6 +2,6 @@
     "file_format_version" : "1.0.0",
     "ICD": {
         "library_path": "../../../Frameworks/libMoltenVK.dylib",
-        "api_version" : "1.0.0"
+        "api_version" : "1.1.0"
     }
 }
diff --git a/misc/scripts/check_ci_log.py b/misc/scripts/check_ci_log.py
new file mode 100755
index 0000000000..f2cdf95c7b
--- /dev/null
+++ b/misc/scripts/check_ci_log.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+if len(sys.argv) < 2:
+    print("ERROR: You must run program with file name as argument.")
+    sys.exit(1)
+
+fname = sys.argv[1]
+
+fileread = open(fname.strip(), "r")
+file_contents = fileread.read()
+
+# If find "ERROR: AddressSanitizer:", then happens invalid read or write
+# This is critical bug, so we need to fix this as fast as possible
+
+if file_contents.find("ERROR: AddressSanitizer:") != -1:
+    print("FATAL ERROR: An incorrectly used memory was found.")
+    sys.exit(1)
+
+# There is also possible, that program crashed with or without backtrace.
+
+if (
+    file_contents.find("Program crashed with signal") != -1
+    or file_contents.find("Dumping the backtrace") != -1
+    or file_contents.find("Segmentation fault (core dumped)") != -1
+):
+    print("FATAL ERROR: Godot has been crashed.")
+    sys.exit(1)
+
+# Finding memory leaks in Godot is quite difficult, because we need to take into
+# account leaks also in external libraries. They are usually provided without
+# debugging symbols, so the leak report from it usually has only 2/3 lines,
+# so searching for 5 element - "#4 0x" - should correctly detect the vast
+# majority of memory leaks
+
+if file_contents.find("ERROR: LeakSanitizer:") != -1:
+    if file_contents.find("#4 0x") != -1:
+        print("ERROR: Memory leak was found")
+        sys.exit(1)
+
+# It may happen that Godot detects leaking nodes/resources and removes them, so
+# this possibility should also be handled as a potential error, even if
+# LeakSanitizer doesn't report anything
+
+if file_contents.find("ObjectDB instances leaked at exit") != -1:
+    print("ERROR: Memory leak was found")
+    sys.exit(1)
+
+# In test project may be put several assert functions which will control if
+# project is executed with right parameters etc. which normally will not stop
+# execution of project
+
+if file_contents.find("Assertion failed") != -1:
+    print("ERROR: Assertion failed in project, check exectution log for more info")
+    sys.exit(1)
+
+# For now Godot leaks a lot of rendering stuff so for now we just show info
+# about it and this needs to be reenabled after fixing this memory leaks.
+
+if file_contents.find("were leaked") != -1 or file_contents.find("were never freed") != -1:
+    print("WARNING: Memory leak was found")
+
+sys.exit(0)
diff --git a/modules/bullet/shape_bullet.cpp b/modules/bullet/shape_bullet.cpp
index 471b154813..40e785d699 100644
--- a/modules/bullet/shape_bullet.cpp
+++ b/modules/bullet/shape_bullet.cpp
@@ -142,7 +142,7 @@ btScaledBvhTriangleMeshShape *ShapeBullet::create_shape_concave(btBvhTriangleMes
 	}
 }
 
-btHeightfieldTerrainShape *ShapeBullet::create_shape_height_field(Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
+btHeightfieldTerrainShape *ShapeBullet::create_shape_height_field(Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
 	const btScalar ignoredHeightScale(1);
 	const int YAxis = 1; // 0=X, 1=Y, 2=Z
 	const bool flipQuadEdges = false;
@@ -480,17 +480,10 @@ void HeightMapShapeBullet::set_data(const Variant &p_data) {
 	ERR_FAIL_COND_MSG(l_width < 2, "Map width must be at least 2.");
 	ERR_FAIL_COND_MSG(l_depth < 2, "Map depth must be at least 2.");
 
-	// TODO This code will need adjustments if real_t is set to `double`,
-	// because that precision is unnecessary for a heightmap and Bullet doesn't support it...
-
-	Vector<real_t> l_heights;
+	Vector<float> l_heights;
 	Variant l_heights_v = d["heights"];
 
-#ifdef REAL_T_IS_DOUBLE
-	if (l_heights_v.get_type() == Variant::PACKED_FLOAT64_ARRAY) {
-#else
 	if (l_heights_v.get_type() == Variant::PACKED_FLOAT32_ARRAY) {
-#endif
 		// Ready-to-use heights can be passed
 
 		l_heights = l_heights_v;
@@ -511,9 +504,9 @@ void HeightMapShapeBullet::set_data(const Variant &p_data) {
 
 		l_heights.resize(l_image->get_width() * l_image->get_height());
 
-		real_t *w = l_heights.ptrw();
+		float *w = l_heights.ptrw();
 		const uint8_t *r = im_data.ptr();
-		real_t *rp = (real_t *)r;
+		float *rp = (float *)r;
 		// At this point, `rp` could be used directly for Bullet, but I don't know how safe it would be.
 
 		for (int i = 0; i < l_heights.size(); ++i) {
@@ -521,11 +514,7 @@ void HeightMapShapeBullet::set_data(const Variant &p_data) {
 		}
 
 	} else {
-#ifdef REAL_T_IS_DOUBLE
-		ERR_FAIL_MSG("Expected PackedFloat64Array or float Image.");
-#else
 		ERR_FAIL_MSG("Expected PackedFloat32Array or float Image.");
-#endif
 	}
 
 	ERR_FAIL_COND(l_width <= 0);
@@ -534,11 +523,11 @@ void HeightMapShapeBullet::set_data(const Variant &p_data) {
 
 	// Compute min and max heights if not specified.
 	if (!d.has("min_height") && !d.has("max_height")) {
-		const real_t *r = l_heights.ptr();
+		const float *r = l_heights.ptr();
 		int heights_size = l_heights.size();
 
 		for (int i = 0; i < heights_size; ++i) {
-			real_t h = r[i];
+			float h = r[i];
 
 			if (h < l_min_height) {
 				l_min_height = h;
@@ -559,7 +548,7 @@ PhysicsServer3D::ShapeType HeightMapShapeBullet::get_type() const {
 	return PhysicsServer3D::SHAPE_HEIGHTMAP;
 }
 
-void HeightMapShapeBullet::setup(Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
+void HeightMapShapeBullet::setup(Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
 	// TODO cell size must be tweaked using localScaling, which is a shared property for all Bullet shapes
 
 	// If this array is resized outside of here, it should be preserved due to CoW
diff --git a/modules/bullet/shape_bullet.h b/modules/bullet/shape_bullet.h
index bfd95747eb..5080d13d99 100644
--- a/modules/bullet/shape_bullet.h
+++ b/modules/bullet/shape_bullet.h
@@ -89,7 +89,7 @@ public:
 	/// IMPORTANT: Remember to delete the shape interface by calling: delete my_shape->getMeshInterface();
 	static class btConvexPointCloudShape *create_shape_convex(btAlignedObjectArray<btVector3> &p_vertices, const btVector3 &p_local_scaling = btVector3(1, 1, 1));
 	static class btScaledBvhTriangleMeshShape *create_shape_concave(btBvhTriangleMeshShape *p_mesh_shape, const btVector3 &p_local_scaling = btVector3(1, 1, 1));
-	static class btHeightfieldTerrainShape *create_shape_height_field(Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
+	static class btHeightfieldTerrainShape *create_shape_height_field(Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
 	static class btRayShape *create_shape_ray(real_t p_length, bool p_slips_on_slope);
 };
 
@@ -212,7 +212,7 @@ private:
 
 class HeightMapShapeBullet : public ShapeBullet {
 public:
-	Vector<real_t> heights;
+	Vector<float> heights;
 	int width = 0;
 	int depth = 0;
 	real_t min_height = 0.0;
@@ -226,7 +226,7 @@ public:
 	virtual btCollisionShape *create_bt_shape(const btVector3 &p_implicit_scale, real_t p_extra_edge = 0);
 
 private:
-	void setup(Vector<real_t> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
+	void setup(Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
 };
 
 class RayShapeBullet : public ShapeBullet {
diff --git a/modules/etc/SCsub b/modules/etc/SCsub
deleted file mode 100644
index 9b46f17916..0000000000
--- a/modules/etc/SCsub
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-
-Import("env")
-Import("env_modules")
-
-env_etc = env_modules.Clone()
-
-# Thirdparty source files
-
-thirdparty_obj = []
-
-# Not unbundled so far since not widespread as shared library
-thirdparty_dir = "#thirdparty/etc2comp/"
-thirdparty_sources = [
-    "EtcBlock4x4.cpp",
-    "EtcBlock4x4Encoding.cpp",
-    "EtcBlock4x4Encoding_ETC1.cpp",
-    "EtcBlock4x4Encoding_R11.cpp",
-    "EtcBlock4x4Encoding_RG11.cpp",
-    "EtcBlock4x4Encoding_RGB8A1.cpp",
-    "EtcBlock4x4Encoding_RGB8.cpp",
-    "EtcBlock4x4Encoding_RGBA8.cpp",
-    "Etc.cpp",
-    "EtcDifferentialTrys.cpp",
-    "EtcFilter.cpp",
-    "EtcImage.cpp",
-    "EtcIndividualTrys.cpp",
-    "EtcMath.cpp",
-    "EtcSortedBlockList.cpp",
-]
-thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
-
-env_etc.Prepend(CPPPATH=[thirdparty_dir])
-
-env_thirdparty = env_etc.Clone()
-env_thirdparty.disable_warnings()
-env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
-env.modules_sources += thirdparty_obj
-
-# Godot source files
-
-module_obj = []
-
-env_etc.add_source_files(module_obj, "*.cpp")
-env.modules_sources += module_obj
-
-# Needed to force rebuilding the module files when the thirdparty library is updated.
-env.Depends(module_obj, thirdparty_obj)
diff --git a/modules/etc/image_compress_etc.cpp b/modules/etc/image_compress_etc.cpp
deleted file mode 100644
index 41cbbe3f54..0000000000
--- a/modules/etc/image_compress_etc.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-/*************************************************************************/
-/*  image_compress_etc.cpp                                               */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "image_compress_etc.h"
-
-#include "core/io/image.h"
-#include "core/os/copymem.h"
-#include "core/os/os.h"
-#include "core/string/print_string.h"
-
-#include <Etc.h>
-#include <EtcFilter.h>
-
-static Image::Format _get_etc2_mode(Image::UsedChannels format) {
-	switch (format) {
-		case Image::USED_CHANNELS_R:
-			return Image::FORMAT_ETC2_R11;
-
-		case Image::USED_CHANNELS_RG:
-			return Image::FORMAT_ETC2_RG11;
-
-		case Image::USED_CHANNELS_RGB:
-			return Image::FORMAT_ETC2_RGB8;
-
-		case Image::USED_CHANNELS_RGBA:
-			return Image::FORMAT_ETC2_RGBA8;
-
-		// TODO: would be nice if we could use FORMAT_ETC2_RGB8A1 for FORMAT_RGBA5551
-		default:
-			// TODO: Kept for compatibility, but should be investigated whether it's correct or if it should error out
-			return Image::FORMAT_ETC2_RGBA8;
-	}
-}
-
-static Etc::Image::Format _image_format_to_etc2comp_format(Image::Format format) {
-	switch (format) {
-		case Image::FORMAT_ETC:
-			return Etc::Image::Format::ETC1;
-
-		case Image::FORMAT_ETC2_R11:
-			return Etc::Image::Format::R11;
-
-		case Image::FORMAT_ETC2_R11S:
-			return Etc::Image::Format::SIGNED_R11;
-
-		case Image::FORMAT_ETC2_RG11:
-			return Etc::Image::Format::RG11;
-
-		case Image::FORMAT_ETC2_RG11S:
-			return Etc::Image::Format::SIGNED_RG11;
-
-		case Image::FORMAT_ETC2_RGB8:
-			return Etc::Image::Format::RGB8;
-
-		case Image::FORMAT_ETC2_RGBA8:
-			return Etc::Image::Format::RGBA8;
-
-		case Image::FORMAT_ETC2_RGB8A1:
-			return Etc::Image::Format::RGB8A1;
-
-		default:
-			ERR_FAIL_V(Etc::Image::Format::UNKNOWN);
-	}
-}
-
-static void _compress_etc(Image *p_img, float p_lossy_quality, bool force_etc1_format, Image::UsedChannels p_channels) {
-	Image::Format img_format = p_img->get_format();
-
-	if (img_format >= Image::FORMAT_DXT1) {
-		return; //do not compress, already compressed
-	}
-
-	if (img_format > Image::FORMAT_RGBA8) {
-		// TODO: we should be able to handle FORMAT_RGBA4444 and FORMAT_RGBA5551 eventually
-		return;
-	}
-
-	// FIXME: Commented out during Vulkan rebase.
-	/*
-	if (force_etc1_format) {
-		// If VRAM compression is using ETC, but image has alpha, convert to RGBA4444 or LA8
-		// This saves space while maintaining the alpha channel
-		if (detected_channels == Image::USED_CHANNELS_RGBA) {
-			if (p_img->has_mipmaps()) {
-				// Image doesn't support mipmaps with RGBA4444 textures
-				p_img->clear_mipmaps();
-			}
-			p_img->convert(Image::FORMAT_RGBA4444);
-			return;
-		} else if (detected_channels == Image::USE_CHANNELS_LA) {
-			p_img->convert(Image::FORMAT_LA8);
-			return;
-		}
-	}
-	*/
-
-	uint32_t imgw = p_img->get_width(), imgh = p_img->get_height();
-
-	Image::Format etc_format = force_etc1_format ? Image::FORMAT_ETC : _get_etc2_mode(p_channels);
-
-	Ref<Image> img = p_img->duplicate();
-
-	if (img->get_format() != Image::FORMAT_RGBA8) {
-		img->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
-	}
-
-	if (img->has_mipmaps()) {
-		if (next_power_of_2(imgw) != imgw || next_power_of_2(imgh) != imgh) {
-			img->resize_to_po2();
-			imgw = img->get_width();
-			imgh = img->get_height();
-		}
-	} else {
-		if (imgw % 4 != 0 || imgh % 4 != 0) {
-			if (imgw % 4) {
-				imgw += 4 - imgw % 4;
-			}
-			if (imgh % 4) {
-				imgh += 4 - imgh % 4;
-			}
-
-			img->resize(imgw, imgh);
-		}
-	}
-
-	const uint8_t *r = img->get_data().ptr();
-	ERR_FAIL_COND(!r);
-
-	unsigned int target_size = Image::get_image_data_size(imgw, imgh, etc_format, p_img->has_mipmaps());
-	int mmc = 1 + (p_img->has_mipmaps() ? Image::get_image_required_mipmaps(imgw, imgh, etc_format) : 0);
-
-	Vector<uint8_t> dst_data;
-	dst_data.resize(target_size);
-
-	uint8_t *w = dst_data.ptrw();
-
-	// prepare parameters to be passed to etc2comp
-	int num_cpus = OS::get_singleton()->get_processor_count();
-	int encoding_time = 0;
-	float effort = 0.0; //default, reasonable time
-
-	if (p_lossy_quality > 0.95) {
-		effort = 80;
-	} else if (p_lossy_quality > 0.85) {
-		effort = 60;
-	} else if (p_lossy_quality > 0.75) {
-		effort = 40;
-	}
-
-	Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBX; // NOTE: we can experiment with other error metrics
-	Etc::Image::Format etc2comp_etc_format = _image_format_to_etc2comp_format(etc_format);
-
-	int wofs = 0;
-
-	print_verbose("ETC: Begin encoding, format: " + Image::get_format_name(etc_format));
-	uint64_t t = OS::get_singleton()->get_ticks_msec();
-	for (int i = 0; i < mmc; i++) {
-		// convert source image to internal etc2comp format (which is equivalent to Image::FORMAT_RGBAF)
-		// NOTE: We can alternatively add a case to Image::convert to handle Image::FORMAT_RGBAF conversion.
-		int mipmap_ofs = 0, mipmap_size = 0, mipmap_w = 0, mipmap_h = 0;
-		img->get_mipmap_offset_size_and_dimensions(i, mipmap_ofs, mipmap_size, mipmap_w, mipmap_h);
-		const uint8_t *src = &r[mipmap_ofs];
-
-		Etc::ColorFloatRGBA *src_rgba_f = new Etc::ColorFloatRGBA[mipmap_w * mipmap_h];
-		for (int j = 0; j < mipmap_w * mipmap_h; j++) {
-			int si = j * 4; // RGBA8
-			src_rgba_f[j] = Etc::ColorFloatRGBA::ConvertFromRGBA8(src[si], src[si + 1], src[si + 2], src[si + 3]);
-		}
-
-		unsigned char *etc_data = nullptr;
-		unsigned int etc_data_len = 0;
-		unsigned int extended_width = 0, extended_height = 0;
-		Etc::Encode((float *)src_rgba_f, mipmap_w, mipmap_h, etc2comp_etc_format, error_metric, effort, num_cpus, num_cpus, &etc_data, &etc_data_len, &extended_width, &extended_height, &encoding_time);
-
-		CRASH_COND(wofs + etc_data_len > target_size);
-		memcpy(&w[wofs], etc_data, etc_data_len);
-		wofs += etc_data_len;
-
-		delete[] etc_data;
-		delete[] src_rgba_f;
-	}
-
-	print_verbose("ETC: Time encoding: " + rtos(OS::get_singleton()->get_ticks_msec() - t));
-
-	p_img->create(imgw, imgh, p_img->has_mipmaps(), etc_format, dst_data);
-}
-
-static void _compress_etc1(Image *p_img, float p_lossy_quality) {
-	_compress_etc(p_img, p_lossy_quality, true, Image::USED_CHANNELS_RGB);
-}
-
-static void _compress_etc2(Image *p_img, float p_lossy_quality, Image::UsedChannels p_channels) {
-	_compress_etc(p_img, p_lossy_quality, false, p_channels);
-}
-
-void _register_etc_compress_func() {
-	Image::_image_compress_etc1_func = _compress_etc1;
-	Image::_image_compress_etc2_func = _compress_etc2;
-}
diff --git a/modules/etc/texture_loader_pkm.cpp b/modules/etc/texture_loader_pkm.cpp
deleted file mode 100644
index 95db9315d5..0000000000
--- a/modules/etc/texture_loader_pkm.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*************************************************************************/
-/*  texture_loader_pkm.cpp                                               */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "texture_loader_pkm.h"
-
-#include "core/os/file_access.h"
-#include <string.h>
-
-struct ETC1Header {
-	char tag[6]; // "PKM 10"
-	uint16_t format = 0; // Format == number of mips (== zero)
-	uint16_t texWidth = 0; // Texture dimensions, multiple of 4 (big-endian)
-	uint16_t texHeight = 0;
-	uint16_t origWidth = 0; // Original dimensions (big-endian)
-	uint16_t origHeight = 0;
-};
-
-RES ResourceFormatPKM::load(const String &p_path, const String &p_original_path, Error *r_error, bool p_use_sub_threads, float *r_progress, CacheMode p_cache_mode) {
-	if (r_error) {
-		*r_error = ERR_CANT_OPEN;
-	}
-
-	Error err;
-	FileAccess *f = FileAccess::open(p_path, FileAccess::READ, &err);
-	if (!f) {
-		return RES();
-	}
-
-	FileAccessRef fref(f);
-	if (r_error) {
-		*r_error = ERR_FILE_CORRUPT;
-	}
-
-	ERR_FAIL_COND_V_MSG(err != OK, RES(), "Unable to open PKM texture file '" + p_path + "'.");
-
-	// big endian
-	f->set_endian_swap(true);
-
-	ETC1Header h;
-	f->get_buffer((uint8_t *)&h.tag, sizeof(h.tag));
-	ERR_FAIL_COND_V_MSG(strncmp(h.tag, "PKM 10", sizeof(h.tag)), RES(), "Invalid or unsupported PKM texture file '" + p_path + "'.");
-
-	h.format = f->get_16();
-	h.texWidth = f->get_16();
-	h.texHeight = f->get_16();
-	h.origWidth = f->get_16();
-	h.origHeight = f->get_16();
-
-	Vector<uint8_t> src_data;
-
-	uint32_t size = h.texWidth * h.texHeight / 2;
-	src_data.resize(size);
-	uint8_t *wb = src_data.ptrw();
-	f->get_buffer(wb, size);
-
-	int mipmaps = h.format;
-	int width = h.origWidth;
-	int height = h.origHeight;
-
-	Ref<Image> img = memnew(Image(width, height, mipmaps, Image::FORMAT_ETC, src_data));
-
-	Ref<ImageTexture> texture = memnew(ImageTexture);
-	texture->create_from_image(img);
-
-	if (r_error) {
-		*r_error = OK;
-	}
-
-	f->close();
-	memdelete(f);
-	return texture;
-}
-
-void ResourceFormatPKM::get_recognized_extensions(List<String> *p_extensions) const {
-	p_extensions->push_back("pkm");
-}
-
-bool ResourceFormatPKM::handles_type(const String &p_type) const {
-	return ClassDB::is_parent_class(p_type, "Texture2D");
-}
-
-String ResourceFormatPKM::get_resource_type(const String &p_path) const {
-	if (p_path.get_extension().to_lower() == "pkm") {
-		return "ImageTexture";
-	}
-	return "";
-}
diff --git a/modules/etcpak/SCsub b/modules/etcpak/SCsub
new file mode 100644
index 0000000000..2d3b69be75
--- /dev/null
+++ b/modules/etcpak/SCsub
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+Import("env")
+Import("env_modules")
+
+env_etcpak = env_modules.Clone()
+
+# Thirdparty source files
+
+thirdparty_obj = []
+
+thirdparty_dir = "#thirdparty/etcpak/"
+thirdparty_sources = [
+    "Dither.cpp",
+    "ProcessDxtc.cpp",
+    "ProcessRGB.cpp",
+    "Tables.cpp",
+]
+thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+
+env_etcpak.Prepend(CPPPATH=[thirdparty_dir])
+
+env_thirdparty = env_etcpak.Clone()
+env_thirdparty.disable_warnings()
+env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
+env.modules_sources += thirdparty_obj
+
+# Godot source files
+
+module_obj = []
+
+env_etcpak.add_source_files(module_obj, "*.cpp")
+env.modules_sources += module_obj
+
+# Needed to force rebuilding the module files when the thirdparty library is updated.
+env.Depends(module_obj, thirdparty_obj)
diff --git a/modules/etc/config.py b/modules/etcpak/config.py
index 53b8f2f2e3..53b8f2f2e3 100644
--- a/modules/etc/config.py
+++ b/modules/etcpak/config.py
diff --git a/modules/etcpak/image_compress_etcpak.cpp b/modules/etcpak/image_compress_etcpak.cpp
new file mode 100644
index 0000000000..abc3c26188
--- /dev/null
+++ b/modules/etcpak/image_compress_etcpak.cpp
@@ -0,0 +1,184 @@
+/*************************************************************************/
+/*  image_compress_etcpak.cpp                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "image_compress_etcpak.h"
+
+#include "core/os/os.h"
+#include "core/string/print_string.h"
+
+#include "thirdparty/etcpak/ProcessDxtc.hpp"
+#include "thirdparty/etcpak/ProcessRGB.hpp"
+
+EtcpakType _determine_etc_type(Image::UsedChannels p_channels) {
+	switch (p_channels) {
+		case Image::USED_CHANNELS_L:
+			return EtcpakType::ETCPAK_TYPE_ETC1;
+		case Image::USED_CHANNELS_LA:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+		case Image::USED_CHANNELS_R:
+			return EtcpakType::ETCPAK_TYPE_ETC2;
+		case Image::USED_CHANNELS_RG:
+			return EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG;
+		case Image::USED_CHANNELS_RGB:
+			return EtcpakType::ETCPAK_TYPE_ETC2;
+		case Image::USED_CHANNELS_RGBA:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+		default:
+			return EtcpakType::ETCPAK_TYPE_ETC2_ALPHA;
+	}
+}
+
+EtcpakType _determine_dxt_type(Image::UsedChannels p_channels) {
+	switch (p_channels) {
+		case Image::USED_CHANNELS_L:
+			return EtcpakType::ETCPAK_TYPE_DXT1;
+		case Image::USED_CHANNELS_LA:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_R:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_RG:
+			return EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG;
+		case Image::USED_CHANNELS_RGB:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		case Image::USED_CHANNELS_RGBA:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+		default:
+			return EtcpakType::ETCPAK_TYPE_DXT5;
+	}
+}
+
+void _compress_etc1(Image *r_img, float p_lossy_quality) {
+	_compress_etcpak(EtcpakType::ETCPAK_TYPE_ETC1, r_img, p_lossy_quality);
+}
+
+void _compress_etc2(Image *r_img, float p_lossy_quality, Image::UsedChannels p_channels) {
+	EtcpakType type = _determine_etc_type(p_channels);
+	_compress_etcpak(type, r_img, p_lossy_quality);
+}
+
+void _compress_bc(Image *r_img, float p_lossy_quality, Image::UsedChannels p_channels) {
+	EtcpakType type = _determine_dxt_type(p_channels);
+	_compress_etcpak(type, r_img, p_lossy_quality);
+}
+
+void _compress_etcpak(EtcpakType p_compresstype, Image *r_img, float p_lossy_quality) {
+	uint64_t start_time = OS::get_singleton()->get_ticks_msec();
+
+	// TODO: See how to handle lossy quality.
+
+	Image::Format img_format = r_img->get_format();
+	if (img_format >= Image::FORMAT_DXT1) {
+		return; // Do not compress, already compressed.
+	}
+	if (img_format > Image::FORMAT_RGBA8) {
+		// TODO: we should be able to handle FORMAT_RGBA4444 and FORMAT_RGBA5551 eventually
+		return;
+	}
+
+	// Use RGBA8 to convert.
+	if (img_format != Image::FORMAT_RGBA8) {
+		r_img->convert(Image::FORMAT_RGBA8);
+	}
+
+	// Determine output format based on Etcpak type.
+	Image::Format target_format = Image::FORMAT_RGBA8;
+	if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC1) {
+		target_format = Image::FORMAT_ETC;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2) {
+		target_format = Image::FORMAT_ETC2_RGB8;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG) {
+		target_format = Image::FORMAT_ETC2_RA_AS_RG;
+		r_img->convert_rg_to_ra_rgba8();
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_ALPHA) {
+		target_format = Image::FORMAT_ETC2_RGBA8;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT1) {
+		target_format = Image::FORMAT_DXT1;
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG) {
+		target_format = Image::FORMAT_DXT5_RA_AS_RG;
+		r_img->convert_rg_to_ra_rgba8();
+	} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5) {
+		target_format = Image::FORMAT_DXT5;
+	} else {
+		ERR_FAIL_MSG("Invalid or unsupported Etcpak compression format.");
+	}
+
+	// Compress image data and (if required) mipmaps.
+
+	const bool mipmaps = r_img->has_mipmaps();
+	const int width = r_img->get_width();
+	const int height = r_img->get_height();
+	const uint8_t *src_read = r_img->get_data().ptr();
+
+	print_verbose(vformat("ETCPAK: Encoding image size %dx%d to format %s.", width, height, Image::get_format_name(target_format)));
+
+	int dest_size = Image::get_image_data_size(width, height, target_format, mipmaps);
+	Vector<uint8_t> dest_data;
+	dest_data.resize(dest_size);
+	uint8_t *dest_write = dest_data.ptrw();
+
+	int mip_count = mipmaps ? Image::get_image_required_mipmaps(width, height, target_format) : 0;
+
+	for (int i = 0; i < mip_count + 1; i++) {
+		// Get write mip metrics for target image.
+		int mip_w, mip_h;
+		int mip_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, target_format, i, mip_w, mip_h);
+		// Ensure that mip offset is a multiple of 8 (etcpak expects uint64_t pointer).
+		ERR_FAIL_COND(mip_ofs % 8 != 0);
+		uint64_t *dest_mip_write = (uint64_t *)&dest_write[mip_ofs];
+
+		// Block size. Align stride to multiple of 4 (RGBA8).
+		mip_w = (mip_w + 3) & ~3;
+		mip_h = (mip_h + 3) & ~3;
+		const uint32_t blocks = mip_w * mip_h / 16;
+
+		// Get mip data from source image for reading.
+		int src_mip_ofs = r_img->get_mipmap_offset(i);
+		const uint32_t *src_mip_read = (const uint32_t *)&src_read[src_mip_ofs];
+
+		if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC1) {
+			CompressEtc1RgbDither(src_mip_read, dest_mip_write, blocks, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2 || p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_RA_AS_RG) {
+			CompressEtc2Rgb(src_mip_read, dest_mip_write, blocks, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_ETC2_ALPHA) {
+			CompressEtc2Rgba(src_mip_read, dest_mip_write, blocks, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT1) {
+			CompressDxt1Dither(src_mip_read, dest_mip_write, blocks, mip_w);
+		} else if (p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5 || p_compresstype == EtcpakType::ETCPAK_TYPE_DXT5_RA_AS_RG) {
+			CompressDxt5(src_mip_read, dest_mip_write, blocks, mip_w);
+		} else {
+			ERR_FAIL_MSG("Invalid or unsupported Etcpak compression format.");
+		}
+	}
+
+	// Replace original image with compressed one.
+	r_img->create(width, height, mipmaps, target_format, dest_data);
+
+	print_verbose(vformat("ETCPAK encode took %s ms.", rtos(OS::get_singleton()->get_ticks_msec() - start_time)));
+}
diff --git a/modules/etc/register_types.cpp b/modules/etcpak/image_compress_etcpak.h
index b165bccb3e..ccf157fada 100644
--- a/modules/etc/register_types.cpp
+++ b/modules/etcpak/image_compress_etcpak.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  register_types.cpp                                                   */
+/*  image_compress_etcpak.h                                              */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,21 +28,25 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "register_types.h"
+#ifndef IMAGE_COMPRESS_ETCPAK_H
+#define IMAGE_COMPRESS_ETCPAK_H
 
-#include "image_compress_etc.h"
-#include "texture_loader_pkm.h"
+#include "core/io/image.h"
 
-static Ref<ResourceFormatPKM> resource_loader_pkm;
+enum class EtcpakType {
+	ETCPAK_TYPE_ETC1,
+	ETCPAK_TYPE_ETC2,
+	ETCPAK_TYPE_ETC2_ALPHA,
+	ETCPAK_TYPE_ETC2_RA_AS_RG,
+	ETCPAK_TYPE_DXT1,
+	ETCPAK_TYPE_DXT5,
+	ETCPAK_TYPE_DXT5_RA_AS_RG,
+};
 
-void register_etc_types() {
-	resource_loader_pkm.instance();
-	ResourceLoader::add_resource_format_loader(resource_loader_pkm);
+void _compress_etc1(Image *r_img, float p_lossy_quality);
+void _compress_etc2(Image *r_img, float p_lossy_quality, Image::UsedChannels p_channels);
+void _compress_bc(Image *r_img, float p_lossy_quality, Image::UsedChannels p_channels);
 
-	_register_etc_compress_func();
-}
+void _compress_etcpak(EtcpakType p_compresstype, Image *r_img, float p_lossy_quality);
 
-void unregister_etc_types() {
-	ResourceLoader::remove_resource_format_loader(resource_loader_pkm);
-	resource_loader_pkm.unref();
-}
+#endif // IMAGE_COMPRESS_ETCPAK_H
diff --git a/modules/squish/image_compress_squish.h b/modules/etcpak/register_types.cpp
index 301d30fcf1..d57d2f747a 100644
--- a/modules/squish/image_compress_squish.h
+++ b/modules/etcpak/register_types.cpp
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  image_compress_squish.h                                              */
+/*  register_types.cpp                                                   */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,12 +28,15 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef IMAGE_COMPRESS_SQUISH_H
-#define IMAGE_COMPRESS_SQUISH_H
+#include "register_types.h"
 
-#include "core/io/image.h"
+#include "image_compress_etcpak.h"
 
-void image_compress_squish(Image *p_image, float p_lossy_quality, Image::UsedChannels p_channels);
-void image_decompress_squish(Image *p_image);
+void register_etcpak_types() {
+	Image::_image_compress_etc1_func = _compress_etc1;
+	Image::_image_compress_etc2_func = _compress_etc2;
+	Image::_image_compress_bc_func = _compress_bc;
+}
 
-#endif // IMAGE_COMPRESS_SQUISH_H
+void unregister_etcpak_types() {
+}
diff --git a/modules/etc/register_types.h b/modules/etcpak/register_types.h
index e8cbb635ae..a9e10a4aae 100644
--- a/modules/etc/register_types.h
+++ b/modules/etcpak/register_types.h
@@ -28,10 +28,10 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef ETC_REGISTER_TYPES_H
-#define ETC_REGISTER_TYPES_H
+#ifndef ETCPAK_REGISTER_TYPES_H
+#define ETCPAK_REGISTER_TYPES_H
 
-void register_etc_types();
-void unregister_etc_types();
+void register_etcpak_types();
+void unregister_etcpak_types();
 
-#endif // ETC_REGISTER_TYPES_H
+#endif // ETCPAK_REGISTER_TYPES_H
diff --git a/modules/fbx/data/fbx_material.cpp b/modules/fbx/data/fbx_material.cpp
index 5995097b2f..d54ac86e9f 100644
--- a/modules/fbx/data/fbx_material.cpp
+++ b/modules/fbx/data/fbx_material.cpp
@@ -277,7 +277,7 @@ Ref<StandardMaterial3D> FBXMaterial::import_material(ImportState &state) {
 	}
 
 	/// ALL below is related to properties
-	for (FBXDocParser::LazyPropertyMap::value_type iter : material->Props()->GetLazyProperties()) {
+	for (FBXDocParser::LazyPropertyMap::value_type iter : material->GetLazyProperties()) {
 		const std::string name = iter.first;
 
 		if (name.empty()) {
@@ -317,7 +317,7 @@ Ref<StandardMaterial3D> FBXMaterial::import_material(ImportState &state) {
 
 		ERR_CONTINUE_MSG(desc == PROPERTY_DESC_NOT_FOUND, "The FBX material parameter: `" + String(name.c_str()) + "` was not recognized. Please open an issue so we can add the support to it.");
 
-		const FBXDocParser::PropertyTable *tbl = material->Props();
+		const FBXDocParser::PropertyTable *tbl = material;
 		FBXDocParser::PropertyPtr prop = tbl->Get(name);
 
 		ERR_CONTINUE_MSG(prop == nullptr, "This file may be corrupted because is not possible to extract the material parameter: " + String(name.c_str()));
diff --git a/modules/fbx/data/fbx_mesh_data.cpp b/modules/fbx/data/fbx_mesh_data.cpp
index b088dd8640..304d1598f6 100644
--- a/modules/fbx/data/fbx_mesh_data.cpp
+++ b/modules/fbx/data/fbx_mesh_data.cpp
@@ -101,20 +101,6 @@ HashMap<int, Vector2> collect_uv(const Vector<VertexData<Vector2>> *p_data, Hash
 	return collection;
 }
 
-typedef int Vertex;
-typedef int SurfaceId;
-typedef int PolygonId;
-typedef int DataIndex;
-
-struct SurfaceData {
-	Ref<SurfaceTool> surface_tool;
-	OrderedHashMap<Vertex, int> lookup_table; // proposed fix is to replace lookup_table[vertex_id] to give the position of the vertices_map[int] index.
-	LocalVector<Vertex> vertices_map; // this must be ordered the same as insertion <-- slow to do find() operation.
-	Ref<Material> material;
-	HashMap<PolygonId, Vector<DataIndex>> surface_polygon_vertex;
-	Array morphs;
-};
-
 EditorSceneImporterMeshNode3D *FBXMeshData::create_fbx_mesh(const ImportState &state, const FBXDocParser::MeshGeometry *p_mesh_geometry, const FBXDocParser::Model *model, bool use_compression) {
 	mesh_geometry = p_mesh_geometry;
 	// todo: make this just use a uint64_t FBX ID this is a copy of our original materials unfortunately.
@@ -307,11 +293,9 @@ EditorSceneImporterMeshNode3D *FBXMeshData::create_fbx_mesh(const ImportState &s
 		// Triangulate the various polygons and add the indices.
 		for (const PolygonId *polygon_id = surface->surface_polygon_vertex.next(nullptr); polygon_id != nullptr; polygon_id = surface->surface_polygon_vertex.next(polygon_id)) {
 			const Vector<DataIndex> *indices = surface->surface_polygon_vertex.getptr(*polygon_id);
-
 			triangulate_polygon(
-					surface->surface_tool,
+					surface,
 					*indices,
-					surface->vertices_map,
 					vertices);
 		}
 	}
@@ -336,7 +320,7 @@ EditorSceneImporterMeshNode3D *FBXMeshData::create_fbx_mesh(const ImportState &s
 			morph_st->begin(Mesh::PRIMITIVE_TRIANGLES);
 
 			for (unsigned int vi = 0; vi < surface->vertices_map.size(); vi += 1) {
-				const Vertex vertex = surface->vertices_map[vi];
+				const Vertex &vertex = surface->vertices_map[vi];
 				add_vertex(
 						state,
 						morph_st,
@@ -398,6 +382,9 @@ EditorSceneImporterMeshNode3D *FBXMeshData::create_fbx_mesh(const ImportState &s
 
 	EditorSceneImporterMeshNode3D *godot_mesh = memnew(EditorSceneImporterMeshNode3D);
 	godot_mesh->set_mesh(mesh);
+	const String name = ImportUtils::FBXNodeToName(model->Name());
+	godot_mesh->set_name(name); // hurry up compiling >.<
+	mesh->set_name("mesh3d-" + name);
 	return godot_mesh;
 }
 
@@ -816,8 +803,10 @@ void FBXMeshData::add_vertex(
 	p_surface_tool->add_vertex((p_vertices_position[p_vertex] + p_morph_value) * p_scale);
 }
 
-void FBXMeshData::triangulate_polygon(Ref<SurfaceTool> st, Vector<int> p_polygon_vertex, const Vector<Vertex> p_surface_vertex_map, const std::vector<Vector3> &p_vertices) const {
+void FBXMeshData::triangulate_polygon(SurfaceData *surface, const Vector<int> &p_polygon_vertex, const std::vector<Vector3> &p_vertices) const {
+	Ref<SurfaceTool> st(surface->surface_tool);
 	const int polygon_vertex_count = p_polygon_vertex.size();
+	//const Vector<Vertex>& p_surface_vertex_map
 	if (polygon_vertex_count == 1) {
 		// point to triangle
 		st->add_index(p_polygon_vertex[0]);
@@ -856,9 +845,9 @@ void FBXMeshData::triangulate_polygon(Ref<SurfaceTool> st, Vector<int> p_polygon
 			is_simple_convex = true;
 			Vector3 first_vec;
 			for (int i = 0; i < polygon_vertex_count; i += 1) {
-				const Vector3 p1 = p_vertices[p_surface_vertex_map[p_polygon_vertex[i]]];
-				const Vector3 p2 = p_vertices[p_surface_vertex_map[p_polygon_vertex[(i + 1) % polygon_vertex_count]]];
-				const Vector3 p3 = p_vertices[p_surface_vertex_map[p_polygon_vertex[(i + 2) % polygon_vertex_count]]];
+				const Vector3 p1 = p_vertices[surface->vertices_map[p_polygon_vertex[i]]];
+				const Vector3 p2 = p_vertices[surface->vertices_map[p_polygon_vertex[(i + 1) % polygon_vertex_count]]];
+				const Vector3 p3 = p_vertices[surface->vertices_map[p_polygon_vertex[(i + 2) % polygon_vertex_count]]];
 
 				const Vector3 edge1 = p1 - p2;
 				const Vector3 edge2 = p3 - p2;
@@ -893,7 +882,7 @@ void FBXMeshData::triangulate_polygon(Ref<SurfaceTool> st, Vector<int> p_polygon
 
 		std::vector<Vector3> poly_vertices(polygon_vertex_count);
 		for (int i = 0; i < polygon_vertex_count; i += 1) {
-			poly_vertices[i] = p_vertices[p_surface_vertex_map[p_polygon_vertex[i]]];
+			poly_vertices[i] = p_vertices[surface->vertices_map[p_polygon_vertex[i]]];
 		}
 
 		const Vector3 poly_norm = get_poly_normal(poly_vertices);
diff --git a/modules/fbx/data/fbx_mesh_data.h b/modules/fbx/data/fbx_mesh_data.h
index 77510ff2ec..575f833584 100644
--- a/modules/fbx/data/fbx_mesh_data.h
+++ b/modules/fbx/data/fbx_mesh_data.h
@@ -32,6 +32,8 @@
 #define FBX_MESH_DATA_H
 
 #include "core/templates/hash_map.h"
+#include "core/templates/local_vector.h"
+#include "core/templates/ordered_hash_map.h"
 #include "editor/import/resource_importer_scene.h"
 #include "editor/import/scene_importer_mesh_node_3d.h"
 #include "scene/3d/mesh_instance_3d.h"
@@ -47,6 +49,20 @@ struct FBXMeshData;
 struct FBXBone;
 struct ImportState;
 
+typedef int Vertex;
+typedef int SurfaceId;
+typedef int PolygonId;
+typedef int DataIndex;
+
+struct SurfaceData {
+	Ref<SurfaceTool> surface_tool;
+	OrderedHashMap<Vertex, int> lookup_table; // proposed fix is to replace lookup_table[vertex_id] to give the position of the vertices_map[int] index.
+	LocalVector<Vertex> vertices_map; // this must be ordered the same as insertion <-- slow to do find() operation.
+	Ref<Material> material;
+	HashMap<PolygonId, Vector<DataIndex>> surface_polygon_vertex;
+	Array morphs;
+};
+
 struct VertexWeightMapping {
 	Vector<real_t> weights;
 	Vector<int> bones;
@@ -127,7 +143,7 @@ private:
 			const Vector3 &p_morph_value = Vector3(),
 			const Vector3 &p_morph_normal = Vector3());
 
-	void triangulate_polygon(Ref<SurfaceTool> st, Vector<int> p_polygon_vertex, Vector<int> p_surface_vertex_map, const std::vector<Vector3> &p_vertices) const;
+	void triangulate_polygon(SurfaceData *surface, const Vector<int> &p_polygon_vertex, const std::vector<Vector3> &p_vertices) const;
 
 	/// This function is responsible to convert the FBX polygon vertex to
 	/// vertex index.
diff --git a/modules/fbx/data/pivot_transform.cpp b/modules/fbx/data/pivot_transform.cpp
index 1895af6f9f..f4055c830f 100644
--- a/modules/fbx/data/pivot_transform.cpp
+++ b/modules/fbx/data/pivot_transform.cpp
@@ -33,7 +33,7 @@
 #include "tools/import_utils.h"
 
 void PivotTransform::ReadTransformChain() {
-	const FBXDocParser::PropertyTable *props = fbx_model->Props();
+	const FBXDocParser::PropertyTable *props = fbx_model;
 	const FBXDocParser::Model::RotOrder &rot = fbx_model->RotationOrder();
 	const FBXDocParser::TransformInheritance &inheritType = fbx_model->InheritType();
 	inherit_type = inheritType; // copy the inherit type we need it in the second step.
diff --git a/modules/fbx/editor_scene_importer_fbx.cpp b/modules/fbx/editor_scene_importer_fbx.cpp
index 55d524883f..b23a58a414 100644
--- a/modules/fbx/editor_scene_importer_fbx.cpp
+++ b/modules/fbx/editor_scene_importer_fbx.cpp
@@ -44,7 +44,6 @@
 #include "scene/3d/bone_attachment_3d.h"
 #include "scene/3d/camera_3d.h"
 #include "scene/3d/light_3d.h"
-#include "scene/3d/mesh_instance_3d.h"
 #include "scene/main/node.h"
 #include "scene/resources/material.h"
 
@@ -121,15 +120,27 @@ Node3D *EditorSceneImporterFBX::import_scene(const String &p_path, uint32_t p_fl
 
 		print_verbose("[doc] opening fbx file: " + p_path);
 		print_verbose("[doc] fbx header: " + fbx_header_string);
+		bool corrupt = false;
 
 		// safer to check this way as there can be different formatted headers
 		if (fbx_header_string.find("Kaydara FBX Binary", 0) != -1) {
 			is_binary = true;
 			print_verbose("[doc] is binary");
-			FBXDocParser::TokenizeBinary(tokens, (const char *)data.ptrw(), (size_t)data.size());
+
+			FBXDocParser::TokenizeBinary(tokens, (const char *)data.ptrw(), (size_t)data.size(), corrupt);
+
 		} else {
 			print_verbose("[doc] is ascii");
-			FBXDocParser::Tokenize(tokens, (const char *)data.ptrw(), (size_t)data.size());
+			FBXDocParser::Tokenize(tokens, (const char *)data.ptrw(), (size_t)data.size(), corrupt);
+		}
+
+		if (corrupt) {
+			for (FBXDocParser::TokenPtr token : tokens) {
+				delete token;
+			}
+			tokens.clear();
+			ERR_PRINT(vformat("Cannot import FBX file: %s the file is corrupt so we safely exited parsing the file.", p_path));
+			return memnew(Node3D);
 		}
 
 		// The import process explained:
@@ -141,6 +152,16 @@ Node3D *EditorSceneImporterFBX::import_scene(const String &p_path, uint32_t p_fl
 		// use this information to construct a very rudimentary
 		// parse-tree representing the FBX scope structure
 		FBXDocParser::Parser parser(tokens, is_binary);
+
+		if (parser.IsCorrupt()) {
+			for (FBXDocParser::TokenPtr token : tokens) {
+				delete token;
+			}
+			tokens.clear();
+			ERR_PRINT(vformat("Cannot import FBX file: %s the file is corrupt so we safely exited parsing the file.", p_path));
+			return memnew(Node3D);
+		}
+
 		FBXDocParser::ImportSettings settings;
 		settings.strictMode = false;
 
@@ -153,12 +174,10 @@ Node3D *EditorSceneImporterFBX::import_scene(const String &p_path, uint32_t p_fl
 		// safety for version handling
 		if (doc.IsSafeToImport()) {
 			bool is_blender_fbx = false;
-			//const FBXDocParser::PropertyPtr app_vendor = p_document->GlobalSettingsPtr()->Props()
-			//	p_document->Creator()
-			const FBXDocParser::PropertyTable *import_props = doc.GetMetadataProperties();
-			const FBXDocParser::PropertyPtr app_name = import_props->Get("Original|ApplicationName");
-			const FBXDocParser::PropertyPtr app_vendor = import_props->Get("Original|ApplicationVendor");
-			const FBXDocParser::PropertyPtr app_version = import_props->Get("Original|ApplicationVersion");
+			const FBXDocParser::PropertyTable &import_props = doc.GetMetadataProperties();
+			const FBXDocParser::PropertyPtr app_name = import_props.Get("Original|ApplicationName");
+			const FBXDocParser::PropertyPtr app_vendor = import_props.Get("Original|ApplicationVendor");
+			const FBXDocParser::PropertyPtr app_version = import_props.Get("Original|ApplicationVersion");
 			//
 			if (app_name) {
 				const FBXDocParser::TypedProperty<std::string> *app_name_string = dynamic_cast<const FBXDocParser::TypedProperty<std::string> *>(app_name);
@@ -200,6 +219,11 @@ Node3D *EditorSceneImporterFBX::import_scene(const String &p_path, uint32_t p_fl
 			return spatial;
 
 		} else {
+			for (FBXDocParser::TokenPtr token : tokens) {
+				delete token;
+			}
+			tokens.clear();
+
 			ERR_PRINT(vformat("Cannot import FBX file: %s. It uses file format %d which is unsupported by Godot. Please re-export it or convert it to a newer format.", p_path, doc.FBXVersion()));
 		}
 	}
@@ -892,7 +916,7 @@ Node3D *EditorSceneImporterFBX::_generate_scene(
 						uint64_t target_id = target->ID();
 						String target_name = ImportUtils::FBXNodeToName(target->Name());
 
-						const FBXDocParser::PropertyTable *properties = curve_node->Props();
+						const FBXDocParser::PropertyTable *properties = curve_node;
 						bool got_x = false, got_y = false, got_z = false;
 						float offset_x = FBXDocParser::PropertyGet<float>(properties, "d|X", got_x);
 						float offset_y = FBXDocParser::PropertyGet<float>(properties, "d|Y", got_y);
@@ -1047,7 +1071,7 @@ Node3D *EditorSceneImporterFBX::_generate_scene(
 
 						Ref<FBXNode> target_node = state.fbx_target_map[target_id];
 						const FBXDocParser::Model *model = target_node->fbx_model;
-						const FBXDocParser::PropertyTable *props = model->Props();
+						const FBXDocParser::PropertyTable *props = dynamic_cast<const FBXDocParser::PropertyTable *>(model);
 
 						Map<StringName, FBXTrack> &track_data = track->value();
 						FBXTrack &translation_keys = track_data[StringName("T")];
diff --git a/modules/fbx/fbx_parser/FBXAnimation.cpp b/modules/fbx/fbx_parser/FBXAnimation.cpp
index 4ab5edebb1..1690df6943 100644
--- a/modules/fbx/fbx_parser/FBXAnimation.cpp
+++ b/modules/fbx/fbx_parser/FBXAnimation.cpp
@@ -130,9 +130,7 @@ AnimationCurve::~AnimationCurve() {
 AnimationCurveNode::AnimationCurveNode(uint64_t id, const ElementPtr element, const std::string &name,
 		const Document &doc, const char *const *target_prop_whitelist /*= NULL*/,
 		size_t whitelist_size /*= 0*/) :
-		Object(id, element, name), doc(doc) {
-	const ScopePtr sc = GetRequiredScope(element);
-
+		Object(id, element, name), target(), doc(doc) {
 	// find target node
 	const char *whitelist[] = { "Model", "NodeAttribute", "Deformer" };
 	const std::vector<const Connection *> &conns = doc.GetConnectionsBySourceSequenced(ID(), whitelist, 3);
@@ -154,8 +152,6 @@ AnimationCurveNode::AnimationCurveNode(uint64_t id, const ElementPtr element, co
 		prop = con->PropertyName();
 		break;
 	}
-
-	props = GetPropertyTable(doc, "AnimationCurveNode.FbxAnimCurveNode", element, sc, false);
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -187,10 +183,6 @@ const AnimationMap &AnimationCurveNode::Curves() const {
 // ------------------------------------------------------------------------------------------------
 AnimationLayer::AnimationLayer(uint64_t id, const ElementPtr element, const std::string &name, const Document &doc) :
 		Object(id, element, name), doc(doc) {
-	const ScopePtr sc = GetRequiredScope(element);
-
-	// note: the props table here bears little importance and is usually absent
-	props = GetPropertyTable(doc, "AnimationLayer.FbxAnimLayer", element, sc, true);
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -248,11 +240,6 @@ const AnimationCurveNodeList AnimationLayer::Nodes(const char *const *target_pro
 // ------------------------------------------------------------------------------------------------
 AnimationStack::AnimationStack(uint64_t id, const ElementPtr element, const std::string &name, const Document &doc) :
 		Object(id, element, name) {
-	const ScopePtr sc = GetRequiredScope(element);
-
-	// note: we don't currently use any of these properties so we shouldn't bother if it is missing
-	props = GetPropertyTable(doc, "AnimationStack.FbxAnimStack", element, sc, true);
-
 	// resolve attached animation layers
 	const std::vector<const Connection *> &conns = doc.GetConnectionsByDestinationSequenced(ID(), "AnimationLayer");
 	layers.reserve(conns.size());
@@ -282,9 +269,5 @@ AnimationStack::AnimationStack(uint64_t id, const ElementPtr element, const std:
 
 // ------------------------------------------------------------------------------------------------
 AnimationStack::~AnimationStack() {
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 } // namespace FBXDocParser
diff --git a/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp b/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp
index 1d2b7765c5..1eee10b251 100644
--- a/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp
+++ b/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp
@@ -130,6 +130,7 @@ Token::Token(const char *sbegin, const char *send, TokenType type, size_t offset
 		line(offset),
 		column(BINARY_MARKER) {
 #ifdef DEBUG_ENABLED
+	// contents is bad.. :/
 	contents = std::string(sbegin, static_cast<size_t>(send - sbegin));
 #endif
 	// calc length
@@ -232,9 +233,11 @@ unsigned int ReadString(const char *&sbegin_out, const char *&send_out, const ch
 }
 
 // ------------------------------------------------------------------------------------------------
-void ReadData(const char *&sbegin_out, const char *&send_out, const char *input, const char *&cursor, const char *end) {
+void ReadData(const char *&sbegin_out, const char *&send_out, const char *input, const char *&cursor, const char *end, bool &corrupt) {
 	if (Offset(cursor, end) < 1) {
 		TokenizeError("cannot ReadData, out of bounds reading length", input, cursor);
+		corrupt = true;
+		return;
 	}
 
 	const char type = *cursor;
@@ -328,9 +331,7 @@ void ReadData(const char *&sbegin_out, const char *&send_out, const char *input,
 			}
 			cursor += comp_len;
 			break;
-		}
-
-			// string
+		} // string
 		case 'S': {
 			const char *sb, *se;
 			// 0 characters can legally happen in such strings
@@ -338,11 +339,15 @@ void ReadData(const char *&sbegin_out, const char *&send_out, const char *input,
 			break;
 		}
 		default:
+			corrupt = true; // must exit
 			TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1), input, cursor);
+			return;
 	}
 
 	if (cursor > end) {
+		corrupt = true; // must exit
 		TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1), input, cursor);
+		return;
 	}
 
 	// the type code is contained in the returned range
@@ -350,7 +355,7 @@ void ReadData(const char *&sbegin_out, const char *&send_out, const char *input,
 }
 
 // ------------------------------------------------------------------------------------------------
-bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor, const char *end, bool const is64bits) {
+bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor, const char *end, bool const is64bits, bool &corrupt) {
 	// the first word contains the offset at which this block ends
 	const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
 
@@ -364,8 +369,12 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 
 	if (end_offset > Offset(input, end)) {
 		TokenizeError("block offset is out of range", input, cursor);
+		corrupt = true;
+		return false;
 	} else if (end_offset < Offset(input, cursor)) {
 		TokenizeError("block offset is negative out of range", input, cursor);
+		corrupt = true;
+		return false;
 	}
 
 	// the second data word contains the number of properties in the scope
@@ -375,7 +384,7 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 	const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
 
 	// now comes the name of the scope/key
-	const char *sbeg, *send;
+	const char *sbeg = nullptr, *send = nullptr;
 	ReadString(sbeg, send, input, cursor, end);
 
 	output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor)));
@@ -383,7 +392,10 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 	// now come the individual properties
 	const char *begin_cursor = cursor;
 	for (unsigned int i = 0; i < prop_count; ++i) {
-		ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
+		ReadData(sbeg, send, input, cursor, begin_cursor + prop_length, corrupt);
+		if (corrupt) {
+			return false;
+		}
 
 		output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor)));
 
@@ -394,6 +406,8 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 
 	if (Offset(begin_cursor, cursor) != prop_length) {
 		TokenizeError("property length not reached, something is wrong", input, cursor);
+		corrupt = true;
+		return false;
 	}
 
 	// at the end of each nested block, there is a NUL record to indicate
@@ -410,13 +424,18 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 
 		// XXX this is vulnerable to stack overflowing ..
 		while (Offset(input, cursor) < end_offset - sentinel_block_length) {
-			ReadScope(output_tokens, input, cursor, input + end_offset - sentinel_block_length, is64bits);
+			ReadScope(output_tokens, input, cursor, input + end_offset - sentinel_block_length, is64bits, corrupt);
+			if (corrupt) {
+				return false;
+			}
 		}
 		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor)));
 
 		for (unsigned int i = 0; i < sentinel_block_length; ++i) {
 			if (cursor[i] != '\0') {
 				TokenizeError("failed to read nested block sentinel, expected all bytes to be 0", input, cursor);
+				corrupt = true;
+				return false;
 			}
 		}
 		cursor += sentinel_block_length;
@@ -424,6 +443,8 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 
 	if (Offset(input, cursor) != end_offset) {
 		TokenizeError("scope length not reached, something is wrong", input, cursor);
+		corrupt = true;
+		return false;
 	}
 
 	return true;
@@ -432,7 +453,7 @@ bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor,
 
 // ------------------------------------------------------------------------------------------------
 // TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
-void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length) {
+void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length, bool &corrupt) {
 	if (length < 0x1b) {
 		//TokenizeError("file is too short",0);
 	}
@@ -459,7 +480,7 @@ void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length)
 	const bool is64bits = version >= 7500;
 	const char *end = input + length;
 	while (cursor < end) {
-		if (!ReadScope(output_tokens, input, cursor, input + length, is64bits)) {
+		if (!ReadScope(output_tokens, input, cursor, input + length, is64bits, corrupt)) {
 			break;
 		}
 	}
diff --git a/modules/fbx/fbx_parser/FBXDeformer.cpp b/modules/fbx/fbx_parser/FBXDeformer.cpp
index 4b774e6b2a..039718ae15 100644
--- a/modules/fbx/fbx_parser/FBXDeformer.cpp
+++ b/modules/fbx/fbx_parser/FBXDeformer.cpp
@@ -89,10 +89,6 @@ using namespace Util;
 // ------------------------------------------------------------------------------------------------
 Deformer::Deformer(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name) :
 		Object(id, element, name) {
-	const ScopePtr sc = GetRequiredScope(element);
-
-	const std::string &classname = ParseTokenAsString(GetRequiredToken(element, 2));
-	props = GetPropertyTable(doc, "Deformer.Fbx" + classname, element, sc, true);
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -101,10 +97,6 @@ Deformer::~Deformer() {
 
 Constraint::Constraint(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name) :
 		Object(id, element, name) {
-	const ScopePtr sc = GetRequiredScope(element);
-	const std::string &classname = ParseTokenAsString(GetRequiredToken(element, 2));
-	// used something.fbx as this is a cache name.
-	props = GetPropertyTable(doc, "Something.Fbx" + classname, element, sc, true);
 }
 
 Constraint::~Constraint() {
diff --git a/modules/fbx/fbx_parser/FBXDocument.cpp b/modules/fbx/fbx_parser/FBXDocument.cpp
index d156db201b..bb85d6ff7c 100644
--- a/modules/fbx/fbx_parser/FBXDocument.cpp
+++ b/modules/fbx/fbx_parser/FBXDocument.cpp
@@ -228,7 +228,7 @@ ObjectPtr LazyObject::LoadObject() {
 
 // ------------------------------------------------------------------------------------------------
 Object::Object(uint64_t id, const ElementPtr element, const std::string &name) :
-		element(element), name(name), id(id) {
+		PropertyTable(element), element(element), name(name), id(id) {
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -237,17 +237,13 @@ Object::~Object() {
 }
 
 // ------------------------------------------------------------------------------------------------
-FileGlobalSettings::FileGlobalSettings(const Document &doc, const PropertyTable *props) :
-		props(props), doc(doc) {
+FileGlobalSettings::FileGlobalSettings(const Document &doc) :
+		PropertyTable(), doc(doc) {
 	// empty
 }
 
 // ------------------------------------------------------------------------------------------------
 FileGlobalSettings::~FileGlobalSettings() {
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -287,15 +283,12 @@ Document::~Document() {
 		delete v.second;
 	}
 
-	if (metadata_properties != nullptr) {
-		delete metadata_properties;
-	}
 	// clear globals import pointer
 	globals.reset();
 }
 
 // ------------------------------------------------------------------------------------------------
-static const unsigned int LowerSupportedVersion = 7300;
+static const unsigned int LowerSupportedVersion = 7100;
 static const unsigned int UpperSupportedVersion = 7700;
 
 bool Document::ReadHeader() {
@@ -306,6 +299,11 @@ bool Document::ReadHeader() {
 		DOMError("no FBXHeaderExtension dictionary found");
 	}
 
+	if (parser.IsCorrupt()) {
+		DOMError("File is corrupt");
+		return false;
+	}
+
 	const ScopePtr shead = ehead->Compound();
 	fbxVersion = ParseTokenAsInt(GetRequiredToken(GetRequiredElement(shead, "FBXVersion", ehead), 0));
 
@@ -325,18 +323,11 @@ bool Document::ReadHeader() {
 		creator = ParseTokenAsString(GetRequiredToken(ecreator, 0));
 	}
 
-	//
 	// Scene Info
-	//
-
 	const ElementPtr scene_info = shead->GetElement("SceneInfo");
 
 	if (scene_info) {
-		PropertyTable *fileExportProps = const_cast<PropertyTable *>(GetPropertyTable(*this, "", scene_info, scene_info->Compound(), true));
-
-		if (fileExportProps) {
-			metadata_properties = fileExportProps;
-		}
+		metadata_properties.Setup(scene_info);
 	}
 
 	const ElementPtr etimestamp = shead->GetElement("CreationTimeStamp");
@@ -358,23 +349,7 @@ bool Document::ReadHeader() {
 void Document::ReadGlobalSettings() {
 	ERR_FAIL_COND_MSG(globals != nullptr, "Global settings is already setup this is a serious error and should be reported");
 
-	const ScopePtr sc = parser.GetRootScope();
-	const ElementPtr ehead = sc->GetElement("GlobalSettings");
-	if (nullptr == ehead || !ehead->Compound()) {
-		DOMWarning("no GlobalSettings dictionary found");
-		globals = std::make_shared<FileGlobalSettings>(*this, new PropertyTable());
-		return;
-	}
-
-	const PropertyTable *props = GetPropertyTable(*this, "", ehead, ehead->Compound(), true);
-
-	//double v = PropertyGet<float>( *props, std::string("UnitScaleFactor"), 1.0 );
-
-	if (!props) {
-		DOMError("GlobalSettings dictionary contains no property table");
-	}
-
-	globals = std::make_shared<FileGlobalSettings>(*this, props);
+	globals = std::make_shared<FileGlobalSettings>(*this);
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -445,58 +420,6 @@ void Document::ReadObjects() {
 
 // ------------------------------------------------------------------------------------------------
 void Document::ReadPropertyTemplates() {
-	const ScopePtr sc = parser.GetRootScope();
-	// read property templates from "Definitions" section
-	const ElementPtr edefs = sc->GetElement("Definitions");
-	if (!edefs || !edefs->Compound()) {
-		DOMWarning("no Definitions dictionary found");
-		return;
-	}
-
-	const ScopePtr sdefs = edefs->Compound();
-	const ElementCollection otypes = sdefs->GetCollection("ObjectType");
-	for (ElementMap::const_iterator it = otypes.first; it != otypes.second; ++it) {
-		const ElementPtr el = (*it).second;
-		const ScopePtr sc_2 = el->Compound();
-		if (!sc_2) {
-			DOMWarning("expected nested scope in ObjectType, ignoring", el);
-			continue;
-		}
-
-		const TokenList &tok = el->Tokens();
-		if (tok.empty()) {
-			DOMWarning("expected name for ObjectType element, ignoring", el);
-			continue;
-		}
-
-		const std::string &oname = ParseTokenAsString(tok[0]);
-
-		const ElementCollection templs = sc_2->GetCollection("PropertyTemplate");
-		for (ElementMap::const_iterator iter = templs.first; iter != templs.second; ++iter) {
-			const ElementPtr el_2 = (*iter).second;
-			const ScopePtr sc_3 = el_2->Compound();
-			if (!sc_3) {
-				DOMWarning("expected nested scope in PropertyTemplate, ignoring", el);
-				continue;
-			}
-
-			const TokenList &tok_2 = el_2->Tokens();
-			if (tok_2.empty()) {
-				DOMWarning("expected name for PropertyTemplate element, ignoring", el);
-				continue;
-			}
-
-			const std::string &pname = ParseTokenAsString(tok_2[0]);
-
-			const ElementPtr Properties70 = sc_3->GetElement("Properties70");
-			if (Properties70) {
-				// PropertyTable(const ElementPtr element, const PropertyTable* templateProps);
-				const PropertyTable *props = new PropertyTable(Properties70, nullptr);
-
-				templates[oname + "." + pname] = props;
-			}
-		}
-	}
 }
 
 // ------------------------------------------------------------------------------------------------
diff --git a/modules/fbx/fbx_parser/FBXDocument.h b/modules/fbx/fbx_parser/FBXDocument.h
index 20e635a6a4..49b7c11c31 100644
--- a/modules/fbx/fbx_parser/FBXDocument.h
+++ b/modules/fbx/fbx_parser/FBXDocument.h
@@ -130,7 +130,7 @@ private:
 };
 
 /** Base class for in-memory (DOM) representations of FBX objects */
-class Object {
+class Object : public PropertyTable {
 public:
 	Object(uint64_t id, const ElementPtr element, const std::string &name);
 
@@ -149,9 +149,9 @@ public:
 	}
 
 protected:
-	const ElementPtr element;
+	const ElementPtr element = nullptr;
 	const std::string name;
-	const uint64_t id = 0;
+	const uint64_t id;
 };
 
 /** DOM class for generic FBX NoteAttribute blocks. NoteAttribute's just hold a property table,
@@ -159,22 +159,13 @@ protected:
 class NodeAttribute : public Object {
 public:
 	NodeAttribute(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~NodeAttribute();
-
-	const PropertyTable *Props() const {
-		return props;
-	}
-
-private:
-	const PropertyTable *props;
 };
 
 /** DOM base class for FBX camera settings attached to a node */
 class CameraSwitcher : public NodeAttribute {
 public:
 	CameraSwitcher(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~CameraSwitcher();
 
 	int CameraID() const {
@@ -190,26 +181,26 @@ public:
 	}
 
 private:
-	int cameraId;
+	int cameraId = 0;
 	std::string cameraName;
 	std::string cameraIndexName;
 };
 
 #define fbx_stringize(a) #a
 
-#define fbx_simple_property(name, type, default_value)                           \
-	type name() const {                                                          \
-		return PropertyGet<type>(Props(), fbx_stringize(name), (default_value)); \
+#define fbx_simple_property(name, type, default_value)                        \
+	type name() const {                                                       \
+		return PropertyGet<type>(this, fbx_stringize(name), (default_value)); \
 	}
 
 // XXX improve logging
-#define fbx_simple_enum_property(name, type, default_value)                                               \
-	type name() const {                                                                                   \
-		const int ival = PropertyGet<int>(Props(), fbx_stringize(name), static_cast<int>(default_value)); \
-		if (ival < 0 || ival >= AI_CONCAT(type, _MAX)) {                                                  \
-			return static_cast<type>(default_value);                                                      \
-		}                                                                                                 \
-		return static_cast<type>(ival);                                                                   \
+#define fbx_simple_enum_property(name, type, default_value)                                            \
+	type name() const {                                                                                \
+		const int ival = PropertyGet<int>(this, fbx_stringize(name), static_cast<int>(default_value)); \
+		if (ival < 0 || ival >= AI_CONCAT(type, _MAX)) {                                               \
+			return static_cast<type>(default_value);                                                   \
+		}                                                                                              \
+		return static_cast<type>(ival);                                                                \
 	}
 
 class FbxPoseNode;
@@ -256,7 +247,7 @@ public:
 	}
 
 private:
-	uint64_t target_id;
+	uint64_t target_id = 0;
 	Transform transform;
 };
 
@@ -264,7 +255,6 @@ private:
 class Camera : public NodeAttribute {
 public:
 	Camera(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~Camera();
 
 	fbx_simple_property(Position, Vector3, Vector3(0, 0, 0));
@@ -380,7 +370,6 @@ public:
 	};
 
 	Model(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~Model();
 
 	fbx_simple_property(QuaternionInterpolate, int, 0);
@@ -466,10 +455,6 @@ public:
 		return culling;
 	}
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	/** Get material links */
 	const std::vector<const Material *> &GetMaterials() const {
 		return materials;
@@ -498,13 +483,11 @@ private:
 
 	std::string shading;
 	std::string culling;
-	const PropertyTable *props = nullptr;
 };
 
 class ModelLimbNode : public Model {
 public:
 	ModelLimbNode(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~ModelLimbNode();
 };
 
@@ -512,7 +495,6 @@ public:
 class Texture : public Object {
 public:
 	Texture(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
-
 	virtual ~Texture();
 
 	const std::string &Type() const {
@@ -539,10 +521,6 @@ public:
 		return uvScaling;
 	}
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	// return a 4-tuple
 	const unsigned int *Crop() const {
 		return crop;
@@ -560,10 +538,8 @@ private:
 	std::string relativeFileName;
 	std::string fileName;
 	std::string alphaSource;
-	const PropertyTable *props = nullptr;
 
 	unsigned int crop[4] = { 0 };
-
 	const Video *media = nullptr;
 };
 
@@ -626,8 +602,8 @@ public:
 
 private:
 	std::vector<const Texture *> textures;
-	BlendMode blendMode;
-	float alpha;
+	BlendMode blendMode = BlendMode::BlendMode_Additive;
+	float alpha = 0;
 };
 
 typedef std::map<std::string, const Texture *> TextureMap;
@@ -656,10 +632,6 @@ public:
 		return relativeFileName;
 	}
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	const uint8_t *Content() const {
 		return content;
 	}
@@ -687,7 +659,6 @@ private:
 	std::string type;
 	std::string relativeFileName;
 	std::string fileName;
-	const PropertyTable *props = nullptr;
 
 	uint64_t contentLength = 0;
 	uint8_t *content = nullptr;
@@ -708,10 +679,6 @@ public:
 		return multilayer;
 	}
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	const TextureMap &Textures() const {
 		return textures;
 	}
@@ -722,8 +689,7 @@ public:
 
 private:
 	std::string shading;
-	bool multilayer;
-	const PropertyTable *props;
+	bool multilayer = false;
 
 	TextureMap textures;
 	LayeredTextureMap layeredTextures;
@@ -791,10 +757,6 @@ public:
 
 	virtual ~AnimationCurveNode();
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	const AnimationMap &Curves() const;
 
 	/** Object the curve is assigned to, this can be NULL if the
@@ -819,7 +781,6 @@ public:
 
 private:
 	Object *target = nullptr;
-	const PropertyTable *props;
 	mutable AnimationMap curves;
 	std::string prop;
 	const Document &doc;
@@ -837,18 +798,12 @@ public:
 	AnimationLayer(uint64_t id, const ElementPtr element, const std::string &name, const Document &doc);
 	virtual ~AnimationLayer();
 
-	const PropertyTable *Props() const {
-		//ai_assert(props.get());
-		return props;
-	}
-
 	/* the optional white list specifies a list of property names for which the caller
     wants animations for. Curves not matching this list will not be added to the
     animation layer. */
 	const AnimationCurveNodeList Nodes(const char *const *target_prop_whitelist = nullptr, size_t whitelist_size = 0) const;
 
 private:
-	const PropertyTable *props;
 	const Document &doc;
 };
 
@@ -863,16 +818,11 @@ public:
 	fbx_simple_property(ReferenceStart, int64_t, 0L);
 	fbx_simple_property(ReferenceStop, int64_t, 0L);
 
-	const PropertyTable *Props() const {
-		return props;
-	}
-
 	const AnimationLayerList &Layers() const {
 		return layers;
 	}
 
 private:
-	const PropertyTable *props = nullptr;
 	AnimationLayerList layers;
 };
 
@@ -881,14 +831,6 @@ class Deformer : public Object {
 public:
 	Deformer(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
 	virtual ~Deformer();
-
-	const PropertyTable *Props() const {
-		//ai_assert(props.get());
-		return props;
-	}
-
-private:
-	const PropertyTable *props;
 };
 
 /** Constraints are from Maya they can help us with BoneAttachments :) **/
@@ -896,9 +838,6 @@ class Constraint : public Object {
 public:
 	Constraint(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name);
 	virtual ~Constraint();
-
-private:
-	const PropertyTable *props;
 };
 
 typedef std::vector<float> WeightArray;
@@ -924,7 +863,7 @@ public:
 	}
 
 private:
-	float percent;
+	float percent = 0;
 	WeightArray fullWeights;
 	std::vector<const ShapeGeometry *> shapeGeometries;
 };
@@ -1006,7 +945,7 @@ private:
 	Transform transformLink;
 	Transform transformAssociateModel;
 	SkinLinkMode link_mode;
-	bool valid_transformAssociateModel;
+	bool valid_transformAssociateModel = false;
 	const Model *node = nullptr;
 };
 
@@ -1037,8 +976,8 @@ public:
 	}
 
 private:
-	float accuracy;
-	SkinType skinType;
+	float accuracy = 0;
+	SkinType skinType = SkinType::Skin_Linear;
 	std::vector<const Cluster *> clusters;
 };
 
@@ -1087,10 +1026,10 @@ public:
 	}
 
 public:
-	uint64_t insertionOrder;
+	uint64_t insertionOrder = 0;
 	const std::string prop;
 
-	uint64_t src, dest;
+	uint64_t src = 0, dest = 0;
 	const Document &doc;
 };
 
@@ -1105,15 +1044,10 @@ typedef std::multimap<uint64_t, const Connection *> ConnectionMap;
 
 /** DOM class for global document settings, a single instance per document can
  *  be accessed via Document.Globals(). */
-class FileGlobalSettings {
+class FileGlobalSettings : public PropertyTable {
 public:
-	FileGlobalSettings(const Document &doc, const PropertyTable *props);
-
-	~FileGlobalSettings();
-
-	const PropertyTable *Props() const {
-		return props;
-	}
+	FileGlobalSettings(const Document &doc);
+	virtual ~FileGlobalSettings();
 
 	const Document &GetDocument() const {
 		return doc;
@@ -1158,7 +1092,6 @@ public:
 	fbx_simple_property(CustomFrameRate, float, -1.0f);
 
 private:
-	const PropertyTable *props = nullptr;
 	const Document &doc;
 };
 
@@ -1196,7 +1129,7 @@ public:
 		return globals.get();
 	}
 
-	const PropertyTable *GetMetadataProperties() const {
+	const PropertyTable &GetMetadataProperties() const {
 		return metadata_properties;
 	}
 
@@ -1293,7 +1226,7 @@ private:
 	std::vector<uint64_t> materials;
 	std::vector<uint64_t> skins;
 	mutable std::vector<const AnimationStack *> animationStacksResolved;
-	PropertyTable *metadata_properties = nullptr;
+	PropertyTable metadata_properties;
 	std::shared_ptr<FileGlobalSettings> globals = nullptr;
 };
 } // namespace FBXDocParser
diff --git a/modules/fbx/fbx_parser/FBXDocumentUtil.cpp b/modules/fbx/fbx_parser/FBXDocumentUtil.cpp
index df50a32c39..3930e005c3 100644
--- a/modules/fbx/fbx_parser/FBXDocumentUtil.cpp
+++ b/modules/fbx/fbx_parser/FBXDocumentUtil.cpp
@@ -137,36 +137,5 @@ void DOMWarning(const std::string &message, const std::shared_ptr<Element> eleme
 	print_verbose("[FBX-DOM] warning:" + String(message.c_str()));
 }
 
-// ------------------------------------------------------------------------------------------------
-// fetch a property table and the corresponding property template
-const PropertyTable *GetPropertyTable(const Document &doc,
-		const std::string &templateName,
-		const ElementPtr element,
-		const ScopePtr sc,
-		bool no_warn /*= false*/) {
-	// todo: make this an abstraction
-	const ElementPtr Properties70 = sc->GetElement("Properties70");
-	const PropertyTable *templateProps = static_cast<const PropertyTable *>(nullptr);
-
-	if (templateName.length()) {
-		PropertyTemplateMap::const_iterator it = doc.Templates().find(templateName);
-		if (it != doc.Templates().end()) {
-			templateProps = (*it).second;
-		}
-	}
-
-	if (!Properties70 || !Properties70->Compound()) {
-		if (!no_warn) {
-			DOMWarning("property table (Properties70) not found", element);
-		}
-		if (templateProps) {
-			return new const PropertyTable(templateProps);
-		} else {
-			return new const PropertyTable();
-		}
-	}
-
-	return new PropertyTable(Properties70, templateProps);
-}
 } // namespace Util
 } // namespace FBXDocParser
diff --git a/modules/fbx/fbx_parser/FBXDocumentUtil.h b/modules/fbx/fbx_parser/FBXDocumentUtil.h
index daa9de4a33..ba86191c4a 100644
--- a/modules/fbx/fbx_parser/FBXDocumentUtil.h
+++ b/modules/fbx/fbx_parser/FBXDocumentUtil.h
@@ -98,13 +98,6 @@ void DOMWarning(const std::string &message, const Element *element);
 void DOMWarning(const std::string &message, const std::shared_ptr<Token> token);
 void DOMWarning(const std::string &message, const std::shared_ptr<Element> element);
 
-// fetch a property table and the corresponding property template
-const PropertyTable *GetPropertyTable(const Document &doc,
-		const std::string &templateName,
-		const ElementPtr element,
-		const ScopePtr sc,
-		bool no_warn = false);
-
 // ------------------------------------------------------------------------------------------------
 template <typename T>
 const T *ProcessSimpleConnection(const Connection &con,
diff --git a/modules/fbx/fbx_parser/FBXMaterial.cpp b/modules/fbx/fbx_parser/FBXMaterial.cpp
index 219da1b2f4..08fff5714a 100644
--- a/modules/fbx/fbx_parser/FBXMaterial.cpp
+++ b/modules/fbx/fbx_parser/FBXMaterial.cpp
@@ -118,8 +118,6 @@ Material::Material(uint64_t id, const ElementPtr element, const Document &doc, c
 		DOMWarning("shading mode not recognized: " + shading, element);
 	}
 
-	props = GetPropertyTable(doc, templateName, element, sc);
-
 	// resolve texture links
 	const std::vector<const Connection *> &conns = doc.GetConnectionsByDestinationSequenced(ID());
 	for (const Connection *con : conns) {
@@ -163,10 +161,6 @@ Material::Material(uint64_t id, const ElementPtr element, const Document &doc, c
 
 // ------------------------------------------------------------------------------------------------
 Material::~Material() {
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -219,17 +213,15 @@ Texture::Texture(uint64_t id, const ElementPtr element, const Document &doc, con
 		alphaSource = ParseTokenAsString(GetRequiredToken(Texture_Alpha_Source, 0));
 	}
 
-	props = GetPropertyTable(doc, "Texture.FbxFileTexture", element, sc);
-
 	// 3DS Max and FBX SDK use "Scaling" and "Translation" instead of "ModelUVScaling" and "ModelUVTranslation". Use these properties if available.
-	bool ok;
-	const Vector3 &scaling = PropertyGet<Vector3>(props, "Scaling", ok);
+	bool ok = true;
+	const Vector3 &scaling = PropertyGet<Vector3>(this, "Scaling", ok);
 	if (ok) {
 		uvScaling.x = scaling.x;
 		uvScaling.y = scaling.y;
 	}
 
-	const Vector3 &trans = PropertyGet<Vector3>(props, "Translation", ok);
+	const Vector3 &trans = PropertyGet<Vector3>(this, "Translation", ok);
 	if (ok) {
 		uvTrans.x = trans.x;
 		uvTrans.y = trans.y;
@@ -254,10 +246,6 @@ Texture::Texture(uint64_t id, const ElementPtr element, const Document &doc, con
 }
 
 Texture::~Texture() {
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 
 LayeredTexture::LayeredTexture(uint64_t id, const ElementPtr element, const Document & /*doc*/, const std::string &name) :
@@ -390,18 +378,11 @@ Video::Video(uint64_t id, const ElementPtr element, const Document &doc, const s
 			//									   runtimeError.what());
 		}
 	}
-
-	props = GetPropertyTable(doc, "Video.FbxVideo", element, sc);
 }
 
 Video::~Video() {
 	if (content) {
 		delete[] content;
 	}
-
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 } // namespace FBXDocParser
diff --git a/modules/fbx/fbx_parser/FBXModel.cpp b/modules/fbx/fbx_parser/FBXModel.cpp
index 767994441f..03c9de0c35 100644
--- a/modules/fbx/fbx_parser/FBXModel.cpp
+++ b/modules/fbx/fbx_parser/FBXModel.cpp
@@ -98,16 +98,11 @@ Model::Model(uint64_t id, const ElementPtr element, const Document &doc, const s
 		culling = ParseTokenAsString(GetRequiredToken(Culling, 0));
 	}
 
-	props = GetPropertyTable(doc, "Model.FbxNode", element, sc);
 	ResolveLinks(element, doc);
 }
 
 // ------------------------------------------------------------------------------------------------
 Model::~Model() {
-	if (props != nullptr) {
-		delete props;
-		props = nullptr;
-	}
 }
 
 ModelLimbNode::ModelLimbNode(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name) :
diff --git a/modules/fbx/fbx_parser/FBXNodeAttribute.cpp b/modules/fbx/fbx_parser/FBXNodeAttribute.cpp
index 2749fc9f4d..15184a0f5d 100644
--- a/modules/fbx/fbx_parser/FBXNodeAttribute.cpp
+++ b/modules/fbx/fbx_parser/FBXNodeAttribute.cpp
@@ -84,16 +84,7 @@ using namespace Util;
 
 // ------------------------------------------------------------------------------------------------
 NodeAttribute::NodeAttribute(uint64_t id, const ElementPtr element, const Document &doc, const std::string &name) :
-		Object(id, element, name), props() {
-	const ScopePtr sc = GetRequiredScope(element);
-
-	const std::string &classname = ParseTokenAsString(GetRequiredToken(element, 2));
-
-	// hack on the deriving type but Null/LimbNode attributes are the only case in which
-	// the property table is by design absent and no warning should be generated
-	// for it.
-	const bool is_null_or_limb = !strcmp(classname.c_str(), "Null") || !strcmp(classname.c_str(), "LimbNode");
-	props = GetPropertyTable(doc, "NodeAttribute.Fbx" + classname, element, sc, is_null_or_limb);
+		Object(id, element, name) {
 }
 
 // ------------------------------------------------------------------------------------------------
diff --git a/modules/fbx/fbx_parser/FBXParser.cpp b/modules/fbx/fbx_parser/FBXParser.cpp
index 166d98bb8c..82d532e0b8 100644
--- a/modules/fbx/fbx_parser/FBXParser.cpp
+++ b/modules/fbx/fbx_parser/FBXParser.cpp
@@ -131,6 +131,8 @@ Element::Element(const TokenPtr key_token, Parser &parser) :
 
 			if (!n) {
 				print_error("unexpected end of file, expected bracket, comma or key" + String(parser.LastToken()->StringContents().c_str()));
+				parser.corrupt = true;
+				return;
 			}
 
 			const TokenType ty = n->Type();
@@ -143,6 +145,8 @@ Element::Element(const TokenPtr key_token, Parser &parser) :
 
 			if (ty != TokenType_OPEN_BRACKET && ty != TokenType_CLOSE_BRACKET && ty != TokenType_COMMA && ty != TokenType_KEY) {
 				print_error("unexpected token; expected bracket, comma or key" + String(n->StringContents().c_str()));
+				parser.corrupt = true;
+				return;
 			}
 		}
 
@@ -150,11 +154,17 @@ Element::Element(const TokenPtr key_token, Parser &parser) :
 			compound = new_Scope(parser);
 			parser.scopes.push_back(compound);
 
+			if (parser.corrupt) {
+				return;
+			}
+
 			// current token should be a TOK_CLOSE_BRACKET
 			n = parser.CurrentToken();
 
 			if (n && n->Type() != TokenType_CLOSE_BRACKET) {
 				print_error("expected closing bracket" + String(n->StringContents().c_str()));
+				parser.corrupt = true;
+				return;
 			}
 
 			parser.AdvanceToNextToken();
@@ -173,22 +183,31 @@ Scope::Scope(Parser &parser, bool topLevel) {
 		TokenPtr t = parser.CurrentToken();
 		if (t->Type() != TokenType_OPEN_BRACKET) {
 			print_error("expected open bracket" + String(t->StringContents().c_str()));
+			parser.corrupt = true;
+			return;
 		}
 	}
 
 	TokenPtr n = parser.AdvanceToNextToken();
 	if (n == nullptr) {
 		print_error("unexpected end of file");
+		parser.corrupt = true;
+		return;
 	}
 
 	// note: empty scopes are allowed
 	while (n && n->Type() != TokenType_CLOSE_BRACKET) {
 		if (n->Type() != TokenType_KEY) {
 			print_error("unexpected token, expected TOK_KEY" + String(n->StringContents().c_str()));
+			parser.corrupt = true;
+			return;
 		}
 
 		const std::string str = n->StringContents();
 
+		if (parser.corrupt) {
+			return;
+		}
 		// std::multimap<std::string, ElementPtr> (key and value)
 		elements.insert(ElementMap::value_type(str, new_Element(n, parser)));
 
@@ -216,7 +235,7 @@ Scope::~Scope() {
 
 // ------------------------------------------------------------------------------------------------
 Parser::Parser(const TokenList &tokens, bool is_binary) :
-		tokens(tokens), cursor(tokens.begin()), is_binary(is_binary) {
+		corrupt(false), tokens(tokens), cursor(tokens.begin()), is_binary(is_binary) {
 	root = new_Scope(*this, true);
 	scopes.push_back(root);
 }
@@ -1231,6 +1250,21 @@ ScopePtr GetRequiredScope(const ElementPtr el) {
 }
 
 // ------------------------------------------------------------------------------------------------
+// extract optional compound scope
+ScopePtr GetOptionalScope(const ElementPtr el) {
+	if (el) {
+		ScopePtr s = el->Compound();
+		TokenPtr token = el->KeyToken();
+
+		if (token && s) {
+			return s;
+		}
+	}
+
+	return nullptr;
+}
+
+// ------------------------------------------------------------------------------------------------
 // get token at a particular index
 TokenPtr GetRequiredToken(const ElementPtr el, unsigned int index) {
 	if (el) {
diff --git a/modules/fbx/fbx_parser/FBXParser.h b/modules/fbx/fbx_parser/FBXParser.h
index 37d27d3dca..bfbcb22ffa 100644
--- a/modules/fbx/fbx_parser/FBXParser.h
+++ b/modules/fbx/fbx_parser/FBXParser.h
@@ -199,6 +199,10 @@ public:
 		return is_binary;
 	}
 
+	bool IsCorrupt() const {
+		return corrupt;
+	}
+
 private:
 	friend class Scope;
 	friend class Element;
@@ -208,6 +212,7 @@ private:
 	TokenPtr CurrentToken() const;
 
 private:
+	bool corrupt = false;
 	ScopeList scopes;
 	const TokenList &tokens;
 
@@ -249,6 +254,8 @@ bool HasElement(const ScopePtr sc, const std::string &index);
 // extract a required element from a scope, abort if the element cannot be found
 ElementPtr GetRequiredElement(const ScopePtr sc, const std::string &index, const ElementPtr element = nullptr);
 ScopePtr GetRequiredScope(const ElementPtr el); // New in 2020. (less likely to destroy application)
+ScopePtr GetOptionalScope(const ElementPtr el); // New in 2021. (even LESS likely to destroy application now)
+
 ElementPtr GetOptionalElement(const ScopePtr sc, const std::string &index, const ElementPtr element = nullptr);
 // extract required compound scope
 ScopePtr GetRequiredScope(const ElementPtr el);
diff --git a/modules/fbx/fbx_parser/FBXProperties.cpp b/modules/fbx/fbx_parser/FBXProperties.cpp
index 84e71512d6..1b3f29ec04 100644
--- a/modules/fbx/fbx_parser/FBXProperties.cpp
+++ b/modules/fbx/fbx_parser/FBXProperties.cpp
@@ -145,19 +145,33 @@ std::string PeekPropertyName(const Element &element) {
 } // namespace
 
 // ------------------------------------------------------------------------------------------------
-PropertyTable::PropertyTable() {
+PropertyTable::PropertyTable() :
+		element(nullptr) {
 }
 
-// ------------------------------------------------------------------------------------------------
-PropertyTable::PropertyTable(const PropertyTable *templateProps) :
-		templateProps(templateProps), element() {
+// Is used when dealing with FBX Objects not metadata.
+PropertyTable::PropertyTable(const ElementPtr element) :
+		element(element) {
+	Setup(element);
 }
 
 // ------------------------------------------------------------------------------------------------
-PropertyTable::PropertyTable(const ElementPtr element, const PropertyTable *templateProps) :
-		templateProps(templateProps), element(element) {
-	const ScopePtr scope = GetRequiredScope(element);
-	ERR_FAIL_COND(!scope);
+PropertyTable::~PropertyTable() {
+	for (PropertyMap::value_type &v : props) {
+		delete v.second;
+	}
+}
+
+void PropertyTable::Setup(ElementPtr ptr) {
+	const ScopePtr sc = GetRequiredScope(ptr);
+	const ElementPtr Properties70 = sc->GetElement("Properties70");
+	const ScopePtr scope = GetOptionalScope(Properties70);
+
+	// no scope, no care.
+	if (!scope) {
+		return; // NOTE: this is not an error this is actually a Object, without properties, here we will nullptr it.
+	}
+
 	for (const ElementMap::value_type &v : scope->Elements()) {
 		if (v.first != "P") {
 			DOMWarning("expected only P elements in property table", v.second);
@@ -182,13 +196,6 @@ PropertyTable::PropertyTable(const ElementPtr element, const PropertyTable *temp
 }
 
 // ------------------------------------------------------------------------------------------------
-PropertyTable::~PropertyTable() {
-	for (PropertyMap::value_type &v : props) {
-		delete v.second;
-	}
-}
-
-// ------------------------------------------------------------------------------------------------
 PropertyPtr PropertyTable::Get(const std::string &name) const {
 	PropertyMap::const_iterator it = props.find(name);
 	if (it == props.end()) {
@@ -203,10 +210,6 @@ PropertyPtr PropertyTable::Get(const std::string &name) const {
 
 		if (it == props.end()) {
 			// check property template
-			if (templateProps) {
-				return templateProps->Get(name);
-			}
-
 			return nullptr;
 		}
 	}
diff --git a/modules/fbx/fbx_parser/FBXProperties.h b/modules/fbx/fbx_parser/FBXProperties.h
index 0595b25fa7..bfd27ac94e 100644
--- a/modules/fbx/fbx_parser/FBXProperties.h
+++ b/modules/fbx/fbx_parser/FBXProperties.h
@@ -137,36 +137,31 @@ class PropertyTable {
 public:
 	// in-memory property table with no source element
 	PropertyTable();
-	PropertyTable(const PropertyTable *templateProps);
-	PropertyTable(const ElementPtr element, const PropertyTable *templateProps);
-	~PropertyTable();
+	PropertyTable(const ElementPtr element);
+	virtual ~PropertyTable();
 
 	PropertyPtr Get(const std::string &name) const;
+	void Setup(ElementPtr ptr);
 
 	// PropertyTable's need not be coupled with FBX elements so this can be NULL
-	ElementPtr GetElement() const {
+	ElementPtr GetElement() {
 		return element;
 	}
 
-	PropertyMap &GetProperties() const {
+	PropertyMap &GetProperties() {
 		return props;
 	}
 
-	const LazyPropertyMap &GetLazyProperties() const {
+	const LazyPropertyMap &GetLazyProperties() {
 		return lazyProps;
 	}
 
-	const PropertyTable *TemplateProps() const {
-		return templateProps;
-	}
-
 	DirectPropertyMap GetUnparsedProperties() const;
 
 private:
 	LazyPropertyMap lazyProps;
 	mutable PropertyMap props;
-	const PropertyTable *templateProps = nullptr;
-	const ElementPtr element = nullptr;
+	ElementPtr element = nullptr;
 };
 
 // ------------------------------------------------------------------------------------------------
@@ -191,16 +186,11 @@ template <typename T>
 inline T PropertyGet(const PropertyTable *in, const std::string &name, bool &result, bool useTemplate = false) {
 	PropertyPtr prop = in->Get(name);
 	if (nullptr == prop) {
-		if (!useTemplate) {
-			result = false;
-			return T();
-		}
-		const PropertyTable *templ = in->TemplateProps();
-		if (nullptr == templ) {
+		if (nullptr == in) {
 			result = false;
 			return T();
 		}
-		prop = templ->Get(name);
+		prop = in->Get(name);
 		if (nullptr == prop) {
 			result = false;
 			return T();
diff --git a/modules/fbx/fbx_parser/FBXTokenizer.cpp b/modules/fbx/fbx_parser/FBXTokenizer.cpp
index ea4568fe32..81c5b128e8 100644
--- a/modules/fbx/fbx_parser/FBXTokenizer.cpp
+++ b/modules/fbx/fbx_parser/FBXTokenizer.cpp
@@ -141,7 +141,7 @@ void ProcessDataToken(TokenList &output_tokens, const char *&start, const char *
 } // namespace
 
 // ------------------------------------------------------------------------------------------------
-void Tokenize(TokenList &output_tokens, const char *input, size_t length) {
+void Tokenize(TokenList &output_tokens, const char *input, size_t length, bool &corrupt) {
 	// line and column numbers numbers are one-based
 	unsigned int line = 1;
 	unsigned int column = 1;
@@ -185,6 +185,8 @@ void Tokenize(TokenList &output_tokens, const char *input, size_t length) {
 			case '\"':
 				if (token_begin) {
 					TokenizeError("unexpected double-quote", line, column);
+					corrupt = true;
+					return;
 				}
 				token_begin = cur;
 				in_double_quotes = true;
diff --git a/modules/fbx/fbx_parser/FBXTokenizer.h b/modules/fbx/fbx_parser/FBXTokenizer.h
index 1e7e5e6535..184d0fd894 100644
--- a/modules/fbx/fbx_parser/FBXTokenizer.h
+++ b/modules/fbx/fbx_parser/FBXTokenizer.h
@@ -187,7 +187,7 @@ typedef std::vector<TokenPtr> TokenList;
  * @param output_tokens Receives a list of all tokens in the input data.
  * @param input_buffer Textual input buffer to be processed, 0-terminated.
  * @print_error if something goes wrong */
-void Tokenize(TokenList &output_tokens, const char *input, size_t length);
+void Tokenize(TokenList &output_tokens, const char *input, size_t length, bool &corrupt);
 
 /** Tokenizer function for binary FBX files.
  *
@@ -197,7 +197,7 @@ void Tokenize(TokenList &output_tokens, const char *input, size_t length);
  * @param input_buffer Binary input buffer to be processed.
  * @param length Length of input buffer, in bytes. There is no 0-terminal.
  * @print_error if something goes wrong */
-void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length);
+void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length, bool &corrupt);
 } // namespace FBXDocParser
 
 #endif // FBX_TOKENIZER_H
diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp
index c9c5d00aa5..5f590383d0 100644
--- a/modules/gdscript/gdscript.cpp
+++ b/modules/gdscript/gdscript.cpp
@@ -45,6 +45,10 @@
 #include "gdscript_parser.h"
 #include "gdscript_warning.h"
 
+#ifdef TESTS_ENABLED
+#include "tests/gdscript_test_runner.h"
+#endif
+
 ///////////////////////////
 
 GDScriptNativeClass::GDScriptNativeClass(const StringName &p_name) {
@@ -1766,6 +1770,10 @@ void GDScriptLanguage::init() {
 	for (List<Engine::Singleton>::Element *E = singletons.front(); E; E = E->next()) {
 		_add_global(E->get().name, E->get().ptr);
 	}
+
+#ifdef TESTS_ENABLED
+	GDScriptTests::GDScriptTestRunner::handle_cmdline();
+#endif
 }
 
 String GDScriptLanguage::get_type() const {
diff --git a/modules/gdscript/gdscript_byte_codegen.cpp b/modules/gdscript/gdscript_byte_codegen.cpp
index b553dcede3..89c5f5482b 100644
--- a/modules/gdscript/gdscript_byte_codegen.cpp
+++ b/modules/gdscript/gdscript_byte_codegen.cpp
@@ -47,7 +47,8 @@ uint32_t GDScriptByteCodeGenerator::add_parameter(const StringName &p_name, bool
 }
 
 uint32_t GDScriptByteCodeGenerator::add_local(const StringName &p_name, const GDScriptDataType &p_type) {
-	int stack_pos = increase_stack();
+	int stack_pos = locals.size() + RESERVED_STACK;
+	locals.push_back(StackSlot(p_type.builtin_type));
 	add_stack_identifier(p_name, stack_pos);
 	return stack_pos;
 }
@@ -66,25 +67,87 @@ uint32_t GDScriptByteCodeGenerator::add_or_get_name(const StringName &p_name) {
 	return get_name_map_pos(p_name);
 }
 
-uint32_t GDScriptByteCodeGenerator::add_temporary() {
-	current_temporaries++;
-	int idx = increase_stack();
-#ifdef DEBUG_ENABLED
-	temp_stack.push_back(idx);
-#endif
-	return idx;
+uint32_t GDScriptByteCodeGenerator::add_temporary(const GDScriptDataType &p_type) {
+	Variant::Type temp_type = Variant::NIL;
+	if (p_type.has_type) {
+		if (p_type.kind == GDScriptDataType::BUILTIN) {
+			switch (p_type.builtin_type) {
+				case Variant::NIL:
+				case Variant::BOOL:
+				case Variant::INT:
+				case Variant::FLOAT:
+				case Variant::STRING:
+				case Variant::VECTOR2:
+				case Variant::VECTOR2I:
+				case Variant::RECT2:
+				case Variant::RECT2I:
+				case Variant::VECTOR3:
+				case Variant::VECTOR3I:
+				case Variant::TRANSFORM2D:
+				case Variant::PLANE:
+				case Variant::QUAT:
+				case Variant::AABB:
+				case Variant::BASIS:
+				case Variant::TRANSFORM:
+				case Variant::COLOR:
+				case Variant::STRING_NAME:
+				case Variant::NODE_PATH:
+				case Variant::RID:
+				case Variant::OBJECT:
+				case Variant::CALLABLE:
+				case Variant::SIGNAL:
+				case Variant::DICTIONARY:
+				case Variant::ARRAY:
+					temp_type = p_type.builtin_type;
+					break;
+				case Variant::PACKED_BYTE_ARRAY:
+				case Variant::PACKED_INT32_ARRAY:
+				case Variant::PACKED_INT64_ARRAY:
+				case Variant::PACKED_FLOAT32_ARRAY:
+				case Variant::PACKED_FLOAT64_ARRAY:
+				case Variant::PACKED_STRING_ARRAY:
+				case Variant::PACKED_VECTOR2_ARRAY:
+				case Variant::PACKED_VECTOR3_ARRAY:
+				case Variant::PACKED_COLOR_ARRAY:
+				case Variant::VARIANT_MAX:
+					// Packed arrays are reference counted, so we don't use the pool for them.
+					temp_type = Variant::NIL;
+					break;
+			}
+		} else {
+			temp_type = Variant::OBJECT;
+		}
+	}
+
+	if (!temporaries_pool.has(temp_type)) {
+		temporaries_pool[temp_type] = List<int>();
+	}
+
+	List<int> &pool = temporaries_pool[temp_type];
+	if (pool.is_empty()) {
+		StackSlot new_temp(temp_type);
+		int idx = temporaries.size();
+		pool.push_back(idx);
+		temporaries.push_back(new_temp);
+
+		// First time using this, so adjust to the proper type.
+		if (temp_type != Variant::NIL) {
+			Address addr(Address::TEMPORARY, idx, p_type);
+			write_type_adjust(addr, temp_type);
+		}
+	}
+	int slot = pool.front()->get();
+	pool.pop_front();
+	used_temporaries.push_back(slot);
+	return slot;
 }
 
 void GDScriptByteCodeGenerator::pop_temporary() {
-	ERR_FAIL_COND(current_temporaries == 0);
-	current_stack_size--;
-#ifdef DEBUG_ENABLED
-	if (temp_stack.back()->get() != current_stack_size) {
-		ERR_PRINT("Mismatched popping of temporary value");
-	}
-	temp_stack.pop_back();
-#endif
-	current_temporaries--;
+	ERR_FAIL_COND(used_temporaries.is_empty());
+	int slot_idx = used_temporaries.back()->get();
+	const StackSlot &slot = temporaries[slot_idx];
+	temporaries_pool[slot.type].push_back(slot_idx);
+	used_temporaries.pop_back();
 }
 
 void GDScriptByteCodeGenerator::start_parameters() {
@@ -119,12 +182,18 @@ void GDScriptByteCodeGenerator::write_start(GDScript *p_script, const StringName
 
 GDScriptFunction *GDScriptByteCodeGenerator::write_end() {
 #ifdef DEBUG_ENABLED
-	if (current_temporaries != 0) {
-		ERR_PRINT("Non-zero temporary variables at end of function: " + itos(current_temporaries));
+	if (!used_temporaries.is_empty()) {
+		ERR_PRINT("Non-zero temporary variables at end of function: " + itos(used_temporaries.size()));
 	}
 #endif
 	append(GDScriptFunction::OPCODE_END, 0);
 
+	for (int i = 0; i < temporaries.size(); i++) {
+		for (int j = 0; j < temporaries[i].bytecode_indices.size(); j++) {
+			opcodes.write[temporaries[i].bytecode_indices[j]] = (i + max_locals + RESERVED_STACK) | (GDScriptFunction::ADDR_TYPE_STACK << GDScriptFunction::ADDR_BITS);
+		}
+	}
+
 	if (constant_map.size()) {
 		function->_constant_count = constant_map.size();
 		function->constants.resize(constant_map.size());
@@ -317,7 +386,7 @@ GDScriptFunction *GDScriptByteCodeGenerator::write_end() {
 	if (debug_stack) {
 		function->stack_debug = stack_debug;
 	}
-	function->_stack_size = stack_max;
+	function->_stack_size = RESERVED_STACK + max_locals + temporaries.size();
 	function->_instruction_args_size = instr_args_max;
 	function->_ptrcall_args_size = ptrcall_max;
 
@@ -341,6 +410,117 @@ void GDScriptByteCodeGenerator::set_initial_line(int p_line) {
 #define IS_BUILTIN_TYPE(m_var, m_type) \
 	(m_var.type.has_type && m_var.type.kind == GDScriptDataType::BUILTIN && m_var.type.builtin_type == m_type)
 
+void GDScriptByteCodeGenerator::write_type_adjust(const Address &p_target, Variant::Type p_new_type) {
+	switch (p_new_type) {
+		case Variant::BOOL:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_BOOL, 1);
+			break;
+		case Variant::INT:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_INT, 1);
+			break;
+		case Variant::FLOAT:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_FLOAT, 1);
+			break;
+		case Variant::STRING:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_STRING, 1);
+			break;
+		case Variant::VECTOR2:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_VECTOR2, 1);
+			break;
+		case Variant::VECTOR2I:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_VECTOR2I, 1);
+			break;
+		case Variant::RECT2:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_RECT2, 1);
+			break;
+		case Variant::RECT2I:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_RECT2I, 1);
+			break;
+		case Variant::VECTOR3:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_VECTOR3, 1);
+			break;
+		case Variant::VECTOR3I:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_VECTOR3I, 1);
+			break;
+		case Variant::TRANSFORM2D:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_TRANSFORM2D, 1);
+			break;
+		case Variant::PLANE:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PLANE, 1);
+			break;
+		case Variant::QUAT:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_QUAT, 1);
+			break;
+		case Variant::AABB:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_AABB, 1);
+			break;
+		case Variant::BASIS:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_BASIS, 1);
+			break;
+		case Variant::TRANSFORM:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_TRANSFORM, 1);
+			break;
+		case Variant::COLOR:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_COLOR, 1);
+			break;
+		case Variant::STRING_NAME:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_STRING_NAME, 1);
+			break;
+		case Variant::NODE_PATH:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_NODE_PATH, 1);
+			break;
+		case Variant::RID:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_RID, 1);
+			break;
+		case Variant::OBJECT:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_OBJECT, 1);
+			break;
+		case Variant::CALLABLE:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_CALLABLE, 1);
+			break;
+		case Variant::SIGNAL:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_SIGNAL, 1);
+			break;
+		case Variant::DICTIONARY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_DICTIONARY, 1);
+			break;
+		case Variant::ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_ARRAY, 1);
+			break;
+		case Variant::PACKED_BYTE_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_BYTE_ARRAY, 1);
+			break;
+		case Variant::PACKED_INT32_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_INT32_ARRAY, 1);
+			break;
+		case Variant::PACKED_INT64_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_INT64_ARRAY, 1);
+			break;
+		case Variant::PACKED_FLOAT32_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_FLOAT32_ARRAY, 1);
+			break;
+		case Variant::PACKED_FLOAT64_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_FLOAT64_ARRAY, 1);
+			break;
+		case Variant::PACKED_STRING_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_STRING_ARRAY, 1);
+			break;
+		case Variant::PACKED_VECTOR2_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_VECTOR2_ARRAY, 1);
+			break;
+		case Variant::PACKED_VECTOR3_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_VECTOR3_ARRAY, 1);
+			break;
+		case Variant::PACKED_COLOR_ARRAY:
+			append(GDScriptFunction::OPCODE_TYPE_ADJUST_PACKED_COLOR_ARRAY, 1);
+			break;
+		case Variant::NIL:
+		case Variant::VARIANT_MAX:
+			return;
+	}
+	append(p_target);
+}
+
 void GDScriptByteCodeGenerator::write_unary_operator(const Address &p_target, Variant::Operator p_operator, const Address &p_left_operand) {
 	if (HAS_BUILTIN_TYPE(p_left_operand)) {
 		// Gather specific operator.
@@ -391,7 +571,7 @@ void GDScriptByteCodeGenerator::write_type_test(const Address &p_target, const A
 }
 
 void GDScriptByteCodeGenerator::write_type_test_builtin(const Address &p_target, const Address &p_source, Variant::Type p_type) {
-	append(GDScriptFunction::OPCODE_IS_BUILTIN, 3);
+	append(GDScriptFunction::OPCODE_IS_BUILTIN, 2);
 	append(p_source);
 	append(p_target);
 	append(p_type);
@@ -806,6 +986,14 @@ void GDScriptByteCodeGenerator::write_call_builtin_type(const Address &p_target,
 		return;
 	}
 
+	if (p_target.mode == Address::TEMPORARY) {
+		Variant::Type result_type = Variant::get_builtin_method_return_type(p_type, p_method);
+		Variant::Type temp_type = temporaries[p_target.address].type;
+		if (result_type != temp_type) {
+			write_type_adjust(p_target, result_type);
+		}
+	}
+
 	append(GDScriptFunction::OPCODE_CALL_BUILTIN_TYPE_VALIDATED, 2 + p_arguments.size());
 
 	for (int i = 0; i < p_arguments.size(); i++) {
diff --git a/modules/gdscript/gdscript_byte_codegen.h b/modules/gdscript/gdscript_byte_codegen.h
index 4b196ed420..17d681d7bb 100644
--- a/modules/gdscript/gdscript_byte_codegen.h
+++ b/modules/gdscript/gdscript_byte_codegen.h
@@ -37,6 +37,17 @@
 #include "gdscript_utility_functions.h"
 
 class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
+	struct StackSlot {
+		Variant::Type type = Variant::NIL;
+		Vector<int> bytecode_indices;
+
+		StackSlot() = default;
+		StackSlot(Variant::Type p_type) :
+				type(p_type) {}
+	};
+
+	const static int RESERVED_STACK = 3; // For self, class, and nil.
+
 	bool ended = false;
 	GDScriptFunction *function = nullptr;
 	bool debug_stack = false;
@@ -47,15 +58,17 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	List<int> stack_identifiers_counts;
 	Map<StringName, int> local_constants;
 
+	Vector<StackSlot> locals;
+	Vector<StackSlot> temporaries;
+	List<int> used_temporaries;
+	Map<Variant::Type, List<int>> temporaries_pool;
+
 	List<GDScriptFunction::StackDebug> stack_debug;
 	List<Map<StringName, int>> block_identifier_stack;
 	Map<StringName, int> block_identifiers;
 
-	int current_stack_size = 3; // First 3 spots are reserved for self, class, and nil.
-	int current_temporaries = 0;
-	int current_locals = 0;
+	int max_locals = 0;
 	int current_line = 0;
-	int stack_max = 3;
 	int instr_args_max = 0;
 	int ptrcall_max = 0;
 
@@ -102,7 +115,9 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	List<List<int>> match_continues_to_patch;
 
 	void add_stack_identifier(const StringName &p_id, int p_stackpos) {
-		current_locals++;
+		if (locals.size() > max_locals) {
+			max_locals = locals.size();
+		}
 		stack_identifiers[p_id] = p_stackpos;
 		if (debug_stack) {
 			block_identifiers[p_id] = p_stackpos;
@@ -116,7 +131,7 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	}
 
 	void push_stack_identifiers() {
-		stack_identifiers_counts.push_back(current_locals);
+		stack_identifiers_counts.push_back(locals.size());
 		stack_id_stack.push_back(stack_identifiers);
 		if (debug_stack) {
 			Map<StringName, int> block_ids(block_identifiers);
@@ -126,17 +141,16 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 	}
 
 	void pop_stack_identifiers() {
-		current_locals = stack_identifiers_counts.back()->get();
+		int current_locals = stack_identifiers_counts.back()->get();
 		stack_identifiers_counts.pop_back();
 		stack_identifiers = stack_id_stack.back()->get();
 		stack_id_stack.pop_back();
 #ifdef DEBUG_ENABLED
-		if (current_temporaries != 0) {
-			ERR_PRINT("Leaving block with non-zero temporary variables: " + itos(current_temporaries));
+		if (!used_temporaries.is_empty()) {
+			ERR_PRINT("Leaving block with non-zero temporary variables: " + itos(used_temporaries.size()));
 		}
 #endif
-		current_stack_size = current_locals + 3; // Keep the 3 reserved slots for self, class, and nil.
-
+		locals.resize(current_locals);
 		if (debug_stack) {
 			for (Map<StringName, int>::Element *E = block_identifiers.front(); E; E = E->next()) {
 				GDScriptFunction::StackDebug sd;
@@ -279,18 +293,6 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 		return pos;
 	}
 
-	void alloc_stack(int p_level) {
-		if (p_level >= stack_max) {
-			stack_max = p_level + 1;
-		}
-	}
-
-	int increase_stack() {
-		int top = current_stack_size++;
-		alloc_stack(current_stack_size);
-		return top;
-	}
-
 	void alloc_ptrcall(int p_params) {
 		if (p_params >= ptrcall_max) {
 			ptrcall_max = p_params;
@@ -308,9 +310,11 @@ class GDScriptByteCodeGenerator : public GDScriptCodeGenerator {
 			case Address::CONSTANT:
 				return p_address.address | (GDScriptFunction::ADDR_TYPE_CONSTANT << GDScriptFunction::ADDR_BITS);
 			case Address::LOCAL_VARIABLE:
-			case Address::TEMPORARY:
 			case Address::FUNCTION_PARAMETER:
 				return p_address.address | (GDScriptFunction::ADDR_TYPE_STACK << GDScriptFunction::ADDR_BITS);
+			case Address::TEMPORARY:
+				temporaries.write[p_address.address].bytecode_indices.push_back(opcodes.size());
+				return -1;
 			case Address::NIL:
 				return GDScriptFunction::ADDR_NIL;
 		}
@@ -392,7 +396,7 @@ public:
 	virtual uint32_t add_local_constant(const StringName &p_name, const Variant &p_constant) override;
 	virtual uint32_t add_or_get_constant(const Variant &p_constant) override;
 	virtual uint32_t add_or_get_name(const StringName &p_name) override;
-	virtual uint32_t add_temporary() override;
+	virtual uint32_t add_temporary(const GDScriptDataType &p_type) override;
 	virtual void pop_temporary() override;
 
 	virtual void start_parameters() override;
@@ -409,6 +413,7 @@ public:
 #endif
 	virtual void set_initial_line(int p_line) override;
 
+	virtual void write_type_adjust(const Address &p_target, Variant::Type p_new_type) override;
 	virtual void write_unary_operator(const Address &p_target, Variant::Operator p_operator, const Address &p_left_operand) override;
 	virtual void write_binary_operator(const Address &p_target, Variant::Operator p_operator, const Address &p_left_operand, const Address &p_right_operand) override;
 	virtual void write_type_test(const Address &p_target, const Address &p_source, const Address &p_type) override;
diff --git a/modules/gdscript/gdscript_codegen.h b/modules/gdscript/gdscript_codegen.h
index cce4e856c7..b377beefdb 100644
--- a/modules/gdscript/gdscript_codegen.h
+++ b/modules/gdscript/gdscript_codegen.h
@@ -71,7 +71,7 @@ public:
 	virtual uint32_t add_local_constant(const StringName &p_name, const Variant &p_constant) = 0;
 	virtual uint32_t add_or_get_constant(const Variant &p_constant) = 0;
 	virtual uint32_t add_or_get_name(const StringName &p_name) = 0;
-	virtual uint32_t add_temporary() = 0;
+	virtual uint32_t add_temporary(const GDScriptDataType &p_type) = 0;
 	virtual void pop_temporary() = 0;
 
 	virtual void start_parameters() = 0;
@@ -80,9 +80,6 @@ public:
 	virtual void start_block() = 0;
 	virtual void end_block() = 0;
 
-	// virtual int get_max_stack_level() = 0;
-	// virtual int get_max_function_arguments() = 0;
-
 	virtual void write_start(GDScript *p_script, const StringName &p_function_name, bool p_static, MultiplayerAPI::RPCMode p_rpc_mode, const GDScriptDataType &p_return_type) = 0;
 	virtual GDScriptFunction *write_end() = 0;
 
@@ -91,9 +88,7 @@ public:
 #endif
 	virtual void set_initial_line(int p_line) = 0;
 
-	// virtual void alloc_stack(int p_level) = 0; // Is this needed?
-	// virtual void alloc_call(int p_arg_count) = 0; // This might be automatic from other functions.
-
+	virtual void write_type_adjust(const Address &p_target, Variant::Type p_new_type) = 0;
 	virtual void write_unary_operator(const Address &p_target, Variant::Operator p_operator, const Address &p_left_operand) = 0;
 	virtual void write_binary_operator(const Address &p_target, Variant::Operator p_operator, const Address &p_left_operand, const Address &p_right_operand) = 0;
 	virtual void write_type_test(const Address &p_target, const Address &p_source, const Address &p_type) = 0;
@@ -138,7 +133,6 @@ public:
 	virtual void write_construct_dictionary(const Address &p_target, const Vector<Address> &p_arguments) = 0;
 	virtual void write_await(const Address &p_target, const Address &p_operand) = 0;
 	virtual void write_if(const Address &p_condition) = 0;
-	// virtual void write_elseif(const Address &p_condition) = 0; This kind of makes things more difficult for no real benefit.
 	virtual void write_else() = 0;
 	virtual void write_endif() = 0;
 	virtual void start_for(const GDScriptDataType &p_iterator_type, const GDScriptDataType &p_list_type) = 0;
diff --git a/modules/gdscript/gdscript_compiler.cpp b/modules/gdscript/gdscript_compiler.cpp
index abbca899bd..7429e3cc0b 100644
--- a/modules/gdscript/gdscript_compiler.cpp
+++ b/modules/gdscript/gdscript_compiler.cpp
@@ -711,7 +711,7 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 		case GDScriptParser::Node::UNARY_OPERATOR: {
 			const GDScriptParser::UnaryOpNode *unary = static_cast<const GDScriptParser::UnaryOpNode *>(p_expression);
 
-			GDScriptCodeGenerator::Address result = codegen.add_temporary();
+			GDScriptCodeGenerator::Address result = codegen.add_temporary(_gdtype_from_datatype(unary->get_datatype()));
 
 			GDScriptCodeGenerator::Address operand = _parse_expression(codegen, r_error, unary->operand);
 			if (r_error) {
@@ -729,7 +729,7 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 		case GDScriptParser::Node::BINARY_OPERATOR: {
 			const GDScriptParser::BinaryOpNode *binary = static_cast<const GDScriptParser::BinaryOpNode *>(p_expression);
 
-			GDScriptCodeGenerator::Address result = codegen.add_temporary();
+			GDScriptCodeGenerator::Address result = codegen.add_temporary(_gdtype_from_datatype(binary->get_datatype()));
 
 			switch (binary->operation) {
 				case GDScriptParser::BinaryOpNode::OP_LOGIC_AND: {
diff --git a/modules/gdscript/gdscript_compiler.h b/modules/gdscript/gdscript_compiler.h
index 1b0beec0d4..c405eadb07 100644
--- a/modules/gdscript/gdscript_compiler.h
+++ b/modules/gdscript/gdscript_compiler.h
@@ -66,7 +66,7 @@ class GDScriptCompiler {
 		}
 
 		GDScriptCodeGenerator::Address add_temporary(const GDScriptDataType &p_type = GDScriptDataType()) {
-			uint32_t addr = generator->add_temporary();
+			uint32_t addr = generator->add_temporary(p_type);
 			return GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::TEMPORARY, addr, p_type);
 		}
 
diff --git a/modules/gdscript/gdscript_disassembler.cpp b/modules/gdscript/gdscript_disassembler.cpp
index 74da0ee232..0d0afcc741 100644
--- a/modules/gdscript/gdscript_disassembler.cpp
+++ b/modules/gdscript/gdscript_disassembler.cpp
@@ -894,6 +894,51 @@ void GDScriptFunction::disassemble(const Vector<String> &p_code_lines) const {
 
 				incr += 2;
 			} break;
+
+#define DISASSEMBLE_TYPE_ADJUST(m_v_type) \
+	case OPCODE_TYPE_ADJUST_##m_v_type: { \
+		text += "type adjust (";          \
+		text += #m_v_type;                \
+		text += ") ";                     \
+		text += DADDR(1);                 \
+		incr += 2;                        \
+	} break
+
+				DISASSEMBLE_TYPE_ADJUST(BOOL);
+				DISASSEMBLE_TYPE_ADJUST(INT);
+				DISASSEMBLE_TYPE_ADJUST(FLOAT);
+				DISASSEMBLE_TYPE_ADJUST(STRING);
+				DISASSEMBLE_TYPE_ADJUST(VECTOR2);
+				DISASSEMBLE_TYPE_ADJUST(VECTOR2I);
+				DISASSEMBLE_TYPE_ADJUST(RECT2);
+				DISASSEMBLE_TYPE_ADJUST(RECT2I);
+				DISASSEMBLE_TYPE_ADJUST(VECTOR3);
+				DISASSEMBLE_TYPE_ADJUST(VECTOR3I);
+				DISASSEMBLE_TYPE_ADJUST(TRANSFORM2D);
+				DISASSEMBLE_TYPE_ADJUST(PLANE);
+				DISASSEMBLE_TYPE_ADJUST(QUAT);
+				DISASSEMBLE_TYPE_ADJUST(AABB);
+				DISASSEMBLE_TYPE_ADJUST(BASIS);
+				DISASSEMBLE_TYPE_ADJUST(TRANSFORM);
+				DISASSEMBLE_TYPE_ADJUST(COLOR);
+				DISASSEMBLE_TYPE_ADJUST(STRING_NAME);
+				DISASSEMBLE_TYPE_ADJUST(NODE_PATH);
+				DISASSEMBLE_TYPE_ADJUST(RID);
+				DISASSEMBLE_TYPE_ADJUST(OBJECT);
+				DISASSEMBLE_TYPE_ADJUST(CALLABLE);
+				DISASSEMBLE_TYPE_ADJUST(SIGNAL);
+				DISASSEMBLE_TYPE_ADJUST(DICTIONARY);
+				DISASSEMBLE_TYPE_ADJUST(ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_BYTE_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_INT32_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_INT64_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_FLOAT32_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_FLOAT64_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_STRING_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_VECTOR2_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_VECTOR3_ARRAY);
+				DISASSEMBLE_TYPE_ADJUST(PACKED_COLOR_ARRAY);
+
 			case OPCODE_ASSERT: {
 				text += "assert (";
 				text += DADDR(1);
diff --git a/modules/gdscript/gdscript_function.h b/modules/gdscript/gdscript_function.h
index 414dfab2e7..fbec734a28 100644
--- a/modules/gdscript/gdscript_function.h
+++ b/modules/gdscript/gdscript_function.h
@@ -351,6 +351,40 @@ public:
 		OPCODE_ITERATE_PACKED_COLOR_ARRAY,
 		OPCODE_ITERATE_OBJECT,
 		OPCODE_STORE_NAMED_GLOBAL,
+		OPCODE_TYPE_ADJUST_BOOL,
+		OPCODE_TYPE_ADJUST_INT,
+		OPCODE_TYPE_ADJUST_FLOAT,
+		OPCODE_TYPE_ADJUST_STRING,
+		OPCODE_TYPE_ADJUST_VECTOR2,
+		OPCODE_TYPE_ADJUST_VECTOR2I,
+		OPCODE_TYPE_ADJUST_RECT2,
+		OPCODE_TYPE_ADJUST_RECT2I,
+		OPCODE_TYPE_ADJUST_VECTOR3,
+		OPCODE_TYPE_ADJUST_VECTOR3I,
+		OPCODE_TYPE_ADJUST_TRANSFORM2D,
+		OPCODE_TYPE_ADJUST_PLANE,
+		OPCODE_TYPE_ADJUST_QUAT,
+		OPCODE_TYPE_ADJUST_AABB,
+		OPCODE_TYPE_ADJUST_BASIS,
+		OPCODE_TYPE_ADJUST_TRANSFORM,
+		OPCODE_TYPE_ADJUST_COLOR,
+		OPCODE_TYPE_ADJUST_STRING_NAME,
+		OPCODE_TYPE_ADJUST_NODE_PATH,
+		OPCODE_TYPE_ADJUST_RID,
+		OPCODE_TYPE_ADJUST_OBJECT,
+		OPCODE_TYPE_ADJUST_CALLABLE,
+		OPCODE_TYPE_ADJUST_SIGNAL,
+		OPCODE_TYPE_ADJUST_DICTIONARY,
+		OPCODE_TYPE_ADJUST_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_BYTE_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_INT32_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_INT64_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_FLOAT32_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_FLOAT64_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_STRING_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_VECTOR2_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_VECTOR3_ARRAY,
+		OPCODE_TYPE_ADJUST_PACKED_COLOR_ARRAY,
 		OPCODE_ASSERT,
 		OPCODE_BREAKPOINT,
 		OPCODE_LINE,
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp
index 695154e9a9..ca8bb8fcae 100644
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -811,6 +811,7 @@ GDScriptParser::VariableNode *GDScriptParser::parse_variable(bool p_allow_proper
 
 	VariableNode *variable = alloc_node<VariableNode>();
 	variable->identifier = parse_identifier();
+	variable->export_info.name = variable->identifier->name;
 
 	if (match(GDScriptTokenizer::Token::COLON)) {
 		if (check(GDScriptTokenizer::Token::NEWLINE)) {
@@ -860,8 +861,6 @@ GDScriptParser::VariableNode *GDScriptParser::parse_variable(bool p_allow_proper
 
 	end_statement("variable declaration");
 
-	variable->export_info.name = variable->identifier->name;
-
 	return variable;
 }
 
diff --git a/modules/gdscript/gdscript_vm.cpp b/modules/gdscript/gdscript_vm.cpp
index 8bf6a8b08b..b47a4eb992 100644
--- a/modules/gdscript/gdscript_vm.cpp
+++ b/modules/gdscript/gdscript_vm.cpp
@@ -282,6 +282,40 @@ String GDScriptFunction::_get_call_error(const Callable::CallError &p_err, const
 		&&OPCODE_ITERATE_PACKED_COLOR_ARRAY,         \
 		&&OPCODE_ITERATE_OBJECT,                     \
 		&&OPCODE_STORE_NAMED_GLOBAL,                 \
+		&&OPCODE_TYPE_ADJUST_BOOL,                   \
+		&&OPCODE_TYPE_ADJUST_INT,                    \
+		&&OPCODE_TYPE_ADJUST_FLOAT,                  \
+		&&OPCODE_TYPE_ADJUST_STRING,                 \
+		&&OPCODE_TYPE_ADJUST_VECTOR2,                \
+		&&OPCODE_TYPE_ADJUST_VECTOR2I,               \
+		&&OPCODE_TYPE_ADJUST_RECT2,                  \
+		&&OPCODE_TYPE_ADJUST_RECT2I,                 \
+		&&OPCODE_TYPE_ADJUST_VECTOR3,                \
+		&&OPCODE_TYPE_ADJUST_VECTOR3I,               \
+		&&OPCODE_TYPE_ADJUST_TRANSFORM2D,            \
+		&&OPCODE_TYPE_ADJUST_PLANE,                  \
+		&&OPCODE_TYPE_ADJUST_QUAT,                   \
+		&&OPCODE_TYPE_ADJUST_AABB,                   \
+		&&OPCODE_TYPE_ADJUST_BASIS,                  \
+		&&OPCODE_TYPE_ADJUST_TRANSFORM,              \
+		&&OPCODE_TYPE_ADJUST_COLOR,                  \
+		&&OPCODE_TYPE_ADJUST_STRING_NAME,            \
+		&&OPCODE_TYPE_ADJUST_NODE_PATH,              \
+		&&OPCODE_TYPE_ADJUST_RID,                    \
+		&&OPCODE_TYPE_ADJUST_OBJECT,                 \
+		&&OPCODE_TYPE_ADJUST_CALLABLE,               \
+		&&OPCODE_TYPE_ADJUST_SIGNAL,                 \
+		&&OPCODE_TYPE_ADJUST_DICTIONARY,             \
+		&&OPCODE_TYPE_ADJUST_ARRAY,                  \
+		&&OPCODE_TYPE_ADJUST_PACKED_BYTE_ARRAY,      \
+		&&OPCODE_TYPE_ADJUST_PACKED_INT32_ARRAY,     \
+		&&OPCODE_TYPE_ADJUST_PACKED_INT64_ARRAY,     \
+		&&OPCODE_TYPE_ADJUST_PACKED_FLOAT32_ARRAY,   \
+		&&OPCODE_TYPE_ADJUST_PACKED_FLOAT64_ARRAY,   \
+		&&OPCODE_TYPE_ADJUST_PACKED_STRING_ARRAY,    \
+		&&OPCODE_TYPE_ADJUST_PACKED_VECTOR2_ARRAY,   \
+		&&OPCODE_TYPE_ADJUST_PACKED_VECTOR3_ARRAY,   \
+		&&OPCODE_TYPE_ADJUST_PACKED_COLOR_ARRAY,     \
 		&&OPCODE_ASSERT,                             \
 		&&OPCODE_BREAKPOINT,                         \
 		&&OPCODE_LINE,                               \
@@ -2973,6 +3007,50 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 			}
 			DISPATCH_OPCODE;
 
+#define OPCODE_TYPE_ADJUST(m_v_type, m_c_type)    \
+	OPCODE(OPCODE_TYPE_ADJUST_##m_v_type) {       \
+		CHECK_SPACE(2);                           \
+		GET_INSTRUCTION_ARG(arg, 0);              \
+		VariantTypeAdjust<m_c_type>::adjust(arg); \
+		ip += 2;                                  \
+	}                                             \
+	DISPATCH_OPCODE
+
+			OPCODE_TYPE_ADJUST(BOOL, bool);
+			OPCODE_TYPE_ADJUST(INT, int64_t);
+			OPCODE_TYPE_ADJUST(FLOAT, double);
+			OPCODE_TYPE_ADJUST(STRING, String);
+			OPCODE_TYPE_ADJUST(VECTOR2, Vector2);
+			OPCODE_TYPE_ADJUST(VECTOR2I, Vector2i);
+			OPCODE_TYPE_ADJUST(RECT2, Rect2);
+			OPCODE_TYPE_ADJUST(RECT2I, Rect2i);
+			OPCODE_TYPE_ADJUST(VECTOR3, Vector3);
+			OPCODE_TYPE_ADJUST(VECTOR3I, Vector3i);
+			OPCODE_TYPE_ADJUST(TRANSFORM2D, Transform2D);
+			OPCODE_TYPE_ADJUST(PLANE, Plane);
+			OPCODE_TYPE_ADJUST(QUAT, Quat);
+			OPCODE_TYPE_ADJUST(AABB, AABB);
+			OPCODE_TYPE_ADJUST(BASIS, Basis);
+			OPCODE_TYPE_ADJUST(TRANSFORM, Transform);
+			OPCODE_TYPE_ADJUST(COLOR, Color);
+			OPCODE_TYPE_ADJUST(STRING_NAME, StringName);
+			OPCODE_TYPE_ADJUST(NODE_PATH, NodePath);
+			OPCODE_TYPE_ADJUST(RID, RID);
+			OPCODE_TYPE_ADJUST(OBJECT, Object *);
+			OPCODE_TYPE_ADJUST(CALLABLE, Callable);
+			OPCODE_TYPE_ADJUST(SIGNAL, Signal);
+			OPCODE_TYPE_ADJUST(DICTIONARY, Dictionary);
+			OPCODE_TYPE_ADJUST(ARRAY, Array);
+			OPCODE_TYPE_ADJUST(PACKED_BYTE_ARRAY, PackedByteArray);
+			OPCODE_TYPE_ADJUST(PACKED_INT32_ARRAY, PackedInt32Array);
+			OPCODE_TYPE_ADJUST(PACKED_INT64_ARRAY, PackedInt64Array);
+			OPCODE_TYPE_ADJUST(PACKED_FLOAT32_ARRAY, PackedFloat32Array);
+			OPCODE_TYPE_ADJUST(PACKED_FLOAT64_ARRAY, PackedFloat64Array);
+			OPCODE_TYPE_ADJUST(PACKED_STRING_ARRAY, PackedStringArray);
+			OPCODE_TYPE_ADJUST(PACKED_VECTOR2_ARRAY, PackedVector2Array);
+			OPCODE_TYPE_ADJUST(PACKED_VECTOR3_ARRAY, PackedVector3Array);
+			OPCODE_TYPE_ADJUST(PACKED_COLOR_ARRAY, PackedColorArray);
+
 			OPCODE(OPCODE_ASSERT) {
 				CHECK_SPACE(3);
 
diff --git a/modules/gdscript/register_types.cpp b/modules/gdscript/register_types.cpp
index 19fd3daf20..2d2f94f5e0 100644
--- a/modules/gdscript/register_types.cpp
+++ b/modules/gdscript/register_types.cpp
@@ -163,19 +163,19 @@ void unregister_gdscript_types() {
 
 #ifdef TESTS_ENABLED
 void test_tokenizer() {
-	TestGDScript::test(TestGDScript::TestType::TEST_TOKENIZER);
+	GDScriptTests::test(GDScriptTests::TestType::TEST_TOKENIZER);
 }
 
 void test_parser() {
-	TestGDScript::test(TestGDScript::TestType::TEST_PARSER);
+	GDScriptTests::test(GDScriptTests::TestType::TEST_PARSER);
 }
 
 void test_compiler() {
-	TestGDScript::test(TestGDScript::TestType::TEST_COMPILER);
+	GDScriptTests::test(GDScriptTests::TestType::TEST_COMPILER);
 }
 
 void test_bytecode() {
-	TestGDScript::test(TestGDScript::TestType::TEST_BYTECODE);
+	GDScriptTests::test(GDScriptTests::TestType::TEST_BYTECODE);
 }
 
 REGISTER_TEST_COMMAND("gdscript-tokenizer", &test_tokenizer);
diff --git a/modules/gdscript/tests/gdscript_test_runner.cpp b/modules/gdscript/tests/gdscript_test_runner.cpp
new file mode 100644
index 0000000000..f53c3046e6
--- /dev/null
+++ b/modules/gdscript/tests/gdscript_test_runner.cpp
@@ -0,0 +1,584 @@
+/*************************************************************************/
+/*  gdscript_test_runner.cpp                                             */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "gdscript_test_runner.h"
+
+#include "../gdscript.h"
+#include "../gdscript_analyzer.h"
+#include "../gdscript_compiler.h"
+#include "../gdscript_parser.h"
+
+#include "core/config/project_settings.h"
+#include "core/core_string_names.h"
+#include "core/io/file_access_pack.h"
+#include "core/os/dir_access.h"
+#include "core/os/os.h"
+#include "core/string/string_builder.h"
+#include "scene/resources/packed_scene.h"
+
+#include "tests/test_macros.h"
+
+namespace GDScriptTests {
+
+void init_autoloads() {
+	Map<StringName, ProjectSettings::AutoloadInfo> autoloads = ProjectSettings::get_singleton()->get_autoload_list();
+
+	// First pass, add the constants so they exist before any script is loaded.
+	for (Map<StringName, ProjectSettings::AutoloadInfo>::Element *E = autoloads.front(); E; E = E->next()) {
+		const ProjectSettings::AutoloadInfo &info = E->get();
+
+		if (info.is_singleton) {
+			for (int i = 0; i < ScriptServer::get_language_count(); i++) {
+				ScriptServer::get_language(i)->add_global_constant(info.name, Variant());
+			}
+		}
+	}
+
+	// Second pass, load into global constants.
+	for (Map<StringName, ProjectSettings::AutoloadInfo>::Element *E = autoloads.front(); E; E = E->next()) {
+		const ProjectSettings::AutoloadInfo &info = E->get();
+
+		if (!info.is_singleton) {
+			// Skip non-singletons since we don't have a scene tree here anyway.
+			continue;
+		}
+
+		RES res = ResourceLoader::load(info.path);
+		ERR_CONTINUE_MSG(res.is_null(), "Can't autoload: " + info.path);
+		Node *n = nullptr;
+		if (res->is_class("PackedScene")) {
+			Ref<PackedScene> ps = res;
+			n = ps->instance();
+		} else if (res->is_class("Script")) {
+			Ref<Script> script_res = res;
+			StringName ibt = script_res->get_instance_base_type();
+			bool valid_type = ClassDB::is_parent_class(ibt, "Node");
+			ERR_CONTINUE_MSG(!valid_type, "Script does not inherit a Node: " + info.path);
+
+			Object *obj = ClassDB::instance(ibt);
+
+			ERR_CONTINUE_MSG(obj == nullptr,
+					"Cannot instance script for autoload, expected 'Node' inheritance, got: " +
+							String(ibt));
+
+			n = Object::cast_to<Node>(obj);
+			n->set_script(script_res);
+		}
+
+		ERR_CONTINUE_MSG(!n, "Path in autoload not a node or script: " + info.path);
+		n->set_name(info.name);
+
+		for (int i = 0; i < ScriptServer::get_language_count(); i++) {
+			ScriptServer::get_language(i)->add_global_constant(info.name, n);
+		}
+	}
+}
+
+void init_language(const String &p_base_path) {
+	// Setup project settings since it's needed by the languages to get the global scripts.
+	// This also sets up the base resource path.
+	Error err = ProjectSettings::get_singleton()->setup(p_base_path, String(), true);
+	if (err) {
+		print_line("Could not load project settings.");
+		// Keep going since some scripts still work without this.
+	}
+
+	// Initialize the language for the test routine.
+	GDScriptLanguage::get_singleton()->init();
+	init_autoloads();
+}
+
+void finish_language() {
+	GDScriptLanguage::get_singleton()->finish();
+	ScriptServer::global_classes_clear();
+}
+
+StringName GDScriptTestRunner::test_function_name;
+
+GDScriptTestRunner::GDScriptTestRunner(const String &p_source_dir, bool p_init_language) {
+	test_function_name = StaticCString::create("test");
+	do_init_languages = p_init_language;
+
+	source_dir = p_source_dir;
+	if (!source_dir.ends_with("/")) {
+		source_dir += "/";
+	}
+
+	if (do_init_languages) {
+		init_language(p_source_dir);
+
+		// Enable all warnings for GDScript, so we can test them.
+		ProjectSettings::get_singleton()->set_setting("debug/gdscript/warnings/enable", true);
+		for (int i = 0; i < (int)GDScriptWarning::WARNING_MAX; i++) {
+			String warning = GDScriptWarning::get_name_from_code((GDScriptWarning::Code)i).to_lower();
+			ProjectSettings::get_singleton()->set_setting("debug/gdscript/warnings/" + warning, true);
+		}
+	}
+
+	// Enable printing to show results
+	_print_line_enabled = true;
+	_print_error_enabled = true;
+}
+
+GDScriptTestRunner::~GDScriptTestRunner() {
+	test_function_name = StringName();
+	if (do_init_languages) {
+		finish_language();
+	}
+}
+
+int GDScriptTestRunner::run_tests() {
+	if (!make_tests()) {
+		FAIL("An error occurred while making the tests.");
+		return -1;
+	}
+
+	if (!generate_class_index()) {
+		FAIL("An error occurred while generating class index.");
+		return -1;
+	}
+
+	int failed = 0;
+	for (int i = 0; i < tests.size(); i++) {
+		GDScriptTest test = tests[i];
+		GDScriptTest::TestResult result = test.run_test();
+
+		String expected = FileAccess::get_file_as_string(test.get_output_file());
+		INFO(test.get_source_file());
+		if (!result.passed) {
+			INFO(expected);
+			failed++;
+		}
+
+		CHECK_MESSAGE(result.passed, (result.passed ? String() : result.output));
+	}
+
+	return failed;
+}
+
+bool GDScriptTestRunner::generate_outputs() {
+	is_generating = true;
+
+	if (!make_tests()) {
+		print_line("Failed to generate a test output.");
+		return false;
+	}
+
+	if (!generate_class_index()) {
+		return false;
+	}
+
+	for (int i = 0; i < tests.size(); i++) {
+		OS::get_singleton()->print(".");
+		GDScriptTest test = tests[i];
+		bool result = test.generate_output();
+
+		if (!result) {
+			print_line("\nCould not generate output for " + test.get_source_file());
+			return false;
+		}
+	}
+	print_line("\nGenerated output files for " + itos(tests.size()) + " tests successfully.");
+
+	return true;
+}
+
+bool GDScriptTestRunner::make_tests_for_dir(const String &p_dir) {
+	Error err = OK;
+	DirAccessRef dir(DirAccess::open(p_dir, &err));
+
+	if (err != OK) {
+		return false;
+	}
+
+	String current_dir = dir->get_current_dir();
+
+	dir->list_dir_begin();
+	String next = dir->get_next();
+
+	while (!next.is_empty()) {
+		if (dir->current_is_dir()) {
+			if (next == "." || next == "..") {
+				next = dir->get_next();
+				continue;
+			}
+			if (!make_tests_for_dir(current_dir.plus_file(next))) {
+				return false;
+			}
+		} else {
+			if (next.get_extension().to_lower() == "gd") {
+				String out_file = next.get_basename() + ".out";
+				if (!is_generating && !dir->file_exists(out_file)) {
+					ERR_FAIL_V_MSG(false, "Could not find output file for " + next);
+				}
+				GDScriptTest test(current_dir.plus_file(next), current_dir.plus_file(out_file), source_dir);
+				tests.push_back(test);
+			}
+		}
+
+		next = dir->get_next();
+	}
+
+	dir->list_dir_end();
+
+	return true;
+}
+
+bool GDScriptTestRunner::make_tests() {
+	Error err = OK;
+	DirAccessRef dir(DirAccess::open(source_dir, &err));
+
+	ERR_FAIL_COND_V_MSG(err != OK, false, "Could not open specified test directory.");
+
+	return make_tests_for_dir(dir->get_current_dir());
+}
+
+bool GDScriptTestRunner::generate_class_index() {
+	StringName gdscript_name = GDScriptLanguage::get_singleton()->get_name();
+	for (int i = 0; i < tests.size(); i++) {
+		GDScriptTest test = tests[i];
+		String base_type;
+
+		String class_name = GDScriptLanguage::get_singleton()->get_global_class_name(test.get_source_file(), &base_type);
+		if (class_name == String()) {
+			continue;
+		}
+		ERR_FAIL_COND_V_MSG(ScriptServer::is_global_class(class_name), false,
+				"Class name '" + class_name + "' from " + test.get_source_file() + " is already used in " + ScriptServer::get_global_class_path(class_name));
+
+		ScriptServer::add_global_class(class_name, base_type, gdscript_name, test.get_source_file());
+	}
+	return true;
+}
+
+GDScriptTest::GDScriptTest(const String &p_source_path, const String &p_output_path, const String &p_base_dir) {
+	source_file = p_source_path;
+	output_file = p_output_path;
+	base_dir = p_base_dir;
+	_print_handler.printfunc = print_handler;
+	_error_handler.errfunc = error_handler;
+}
+
+void GDScriptTestRunner::handle_cmdline() {
+	List<String> cmdline_args = OS::get_singleton()->get_cmdline_args();
+	// TODO: this could likely be ported to use test commands:
+	// https://github.com/godotengine/godot/pull/41355
+	// Currently requires to startup the whole engine, which is slow.
+	String test_cmd = "--gdscript-test";
+	String gen_cmd = "--gdscript-generate-tests";
+
+	for (List<String>::Element *E = cmdline_args.front(); E != nullptr; E = E->next()) {
+		String &cmd = E->get();
+		if (cmd == test_cmd || cmd == gen_cmd) {
+			if (E->next() == nullptr) {
+				ERR_PRINT("Needed a path for the test files.");
+				exit(-1);
+			}
+
+			const String &path = E->next()->get();
+
+			GDScriptTestRunner runner(path, false);
+			int failed = 0;
+			if (cmd == test_cmd) {
+				failed = runner.run_tests();
+			} else {
+				bool completed = runner.generate_outputs();
+				failed = completed ? 0 : -1;
+			}
+			exit(failed);
+		}
+	}
+}
+
+void GDScriptTest::enable_stdout() {
+	// TODO: this could likely be handled by doctest or `tests/test_macros.h`.
+	OS::get_singleton()->set_stdout_enabled(true);
+	OS::get_singleton()->set_stderr_enabled(true);
+}
+
+void GDScriptTest::disable_stdout() {
+	// TODO: this could likely be handled by doctest or `tests/test_macros.h`.
+	OS::get_singleton()->set_stdout_enabled(false);
+	OS::get_singleton()->set_stderr_enabled(false);
+}
+
+void GDScriptTest::print_handler(void *p_this, const String &p_message, bool p_error) {
+	TestResult *result = (TestResult *)p_this;
+	result->output += p_message + "\n";
+}
+
+void GDScriptTest::error_handler(void *p_this, const char *p_function, const char *p_file, int p_line, const char *p_error, const char *p_explanation, ErrorHandlerType p_type) {
+	ErrorHandlerData *data = (ErrorHandlerData *)p_this;
+	GDScriptTest *self = data->self;
+	TestResult *result = data->result;
+
+	result->status = GDTEST_RUNTIME_ERROR;
+
+	StringBuilder builder;
+	builder.append(">> ");
+	switch (p_type) {
+		case ERR_HANDLER_ERROR:
+			builder.append("ERROR");
+			break;
+		case ERR_HANDLER_WARNING:
+			builder.append("WARNING");
+			break;
+		case ERR_HANDLER_SCRIPT:
+			builder.append("SCRIPT ERROR");
+			break;
+		case ERR_HANDLER_SHADER:
+			builder.append("SHADER ERROR");
+			break;
+		default:
+			builder.append("Unknown error type");
+			break;
+	}
+
+	builder.append("\n>> ");
+	builder.append(p_function);
+	builder.append("\n>> ");
+	builder.append(p_function);
+	builder.append("\n>> ");
+	builder.append(String(p_file).trim_prefix(self->base_dir));
+	builder.append("\n>> ");
+	builder.append(itos(p_line));
+	builder.append("\n>> ");
+	builder.append(p_error);
+	if (strlen(p_explanation) > 0) {
+		builder.append("\n>> ");
+		builder.append(p_explanation);
+	}
+	builder.append("\n");
+
+	result->output = builder.as_string();
+}
+
+bool GDScriptTest::check_output(const String &p_output) const {
+	Error err = OK;
+	String expected = FileAccess::get_file_as_string(output_file, &err);
+
+	ERR_FAIL_COND_V_MSG(err != OK, false, "Error when opening the output file.");
+
+	String got = p_output.strip_edges(); // TODO: may be hacky.
+	got += "\n"; // Make sure to insert newline for CI static checks.
+
+	return got == expected;
+}
+
+String GDScriptTest::get_text_for_status(GDScriptTest::TestStatus p_status) const {
+	switch (p_status) {
+		case GDTEST_OK:
+			return "GDTEST_OK";
+		case GDTEST_LOAD_ERROR:
+			return "GDTEST_LOAD_ERROR";
+		case GDTEST_PARSER_ERROR:
+			return "GDTEST_PARSER_ERROR";
+		case GDTEST_ANALYZER_ERROR:
+			return "GDTEST_ANALYZER_ERROR";
+		case GDTEST_COMPILER_ERROR:
+			return "GDTEST_COMPILER_ERROR";
+		case GDTEST_RUNTIME_ERROR:
+			return "GDTEST_RUNTIME_ERROR";
+	}
+	return "";
+}
+
+GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) {
+	disable_stdout();
+
+	TestResult result;
+	result.status = GDTEST_OK;
+	result.output = String();
+
+	Error err = OK;
+
+	// Create script.
+	Ref<GDScript> script;
+	script.instance();
+	script->set_path(source_file);
+	script->set_script_path(source_file);
+	err = script->load_source_code(source_file);
+	if (err != OK) {
+		enable_stdout();
+		result.status = GDTEST_LOAD_ERROR;
+		result.passed = false;
+		ERR_FAIL_V_MSG(result, "\nCould not load source code for: '" + source_file + "'");
+	}
+
+	// Test parsing.
+	GDScriptParser parser;
+	err = parser.parse(script->get_source_code(), source_file, false);
+	if (err != OK) {
+		enable_stdout();
+		result.status = GDTEST_PARSER_ERROR;
+		result.output = get_text_for_status(result.status) + "\n";
+
+		const List<GDScriptParser::ParserError> &errors = parser.get_errors();
+		for (auto *E = errors.front(); E; E = E->next()) {
+			result.output += E->get().message + "\n"; // TODO: line, column?
+			break; // Only the first error since the following might be cascading.
+		}
+		if (!p_is_generating) {
+			result.passed = check_output(result.output);
+		}
+		return result;
+	}
+
+	// Test type-checking.
+	GDScriptAnalyzer analyzer(&parser);
+	err = analyzer.analyze();
+	if (err != OK) {
+		enable_stdout();
+		result.status = GDTEST_ANALYZER_ERROR;
+		result.output = get_text_for_status(result.status) + "\n";
+
+		const List<GDScriptParser::ParserError> &errors = parser.get_errors();
+		for (auto *E = errors.front(); E; E = E->next()) {
+			result.output += E->get().message + "\n"; // TODO: line, column?
+			break; // Only the first error since the following might be cascading.
+		}
+		if (!p_is_generating) {
+			result.passed = check_output(result.output);
+		}
+		return result;
+	}
+
+	StringBuilder warning_string;
+	for (const List<GDScriptWarning>::Element *E = parser.get_warnings().front(); E != nullptr; E = E->next()) {
+		const GDScriptWarning warning = E->get();
+		warning_string.append(">> WARNING");
+		warning_string.append("\n>> Line: ");
+		warning_string.append(itos(warning.start_line));
+		warning_string.append("\n>> ");
+		warning_string.append(warning.get_name());
+		warning_string.append("\n>> ");
+		warning_string.append(warning.get_message());
+		warning_string.append("\n");
+	}
+	result.output += warning_string.as_string();
+
+	// Test compiling.
+	GDScriptCompiler compiler;
+	err = compiler.compile(&parser, script.ptr(), false);
+	if (err != OK) {
+		enable_stdout();
+		result.status = GDTEST_COMPILER_ERROR;
+		result.output = get_text_for_status(result.status) + "\n";
+		result.output = compiler.get_error();
+		if (!p_is_generating) {
+			result.passed = check_output(result.output);
+		}
+		return result;
+	}
+
+	// Test running.
+	const Map<StringName, GDScriptFunction *>::Element *test_function_element = script->get_member_functions().find(GDScriptTestRunner::test_function_name);
+	if (test_function_element == nullptr) {
+		enable_stdout();
+		result.status = GDTEST_LOAD_ERROR;
+		result.output = "";
+		result.passed = false;
+		ERR_FAIL_V_MSG(result, "\nCould not find test function on: '" + source_file + "'");
+	}
+
+	script->reload();
+
+	// Create object instance for test.
+	Object *obj = ClassDB::instance(script->get_native()->get_name());
+	Ref<Reference> obj_ref;
+	if (obj->is_reference()) {
+		obj_ref = Ref<Reference>(Object::cast_to<Reference>(obj));
+	}
+	obj->set_script(script);
+	GDScriptInstance *instance = static_cast<GDScriptInstance *>(obj->get_script_instance());
+
+	// Setup output handlers.
+	ErrorHandlerData error_data(&result, this);
+
+	_print_handler.userdata = &result;
+	_error_handler.userdata = &error_data;
+	add_print_handler(&_print_handler);
+	add_error_handler(&_error_handler);
+
+	// Call test function.
+	Callable::CallError call_err;
+	instance->call(GDScriptTestRunner::test_function_name, nullptr, 0, call_err);
+
+	// Tear down output handlers.
+	remove_print_handler(&_print_handler);
+	remove_error_handler(&_error_handler);
+
+	// Check results.
+	if (call_err.error != Callable::CallError::CALL_OK) {
+		enable_stdout();
+		result.status = GDTEST_LOAD_ERROR;
+		result.passed = false;
+		ERR_FAIL_V_MSG(result, "\nCould not call test function on: '" + source_file + "'");
+	}
+
+	result.output = get_text_for_status(result.status) + "\n" + result.output;
+	if (!p_is_generating) {
+		result.passed = check_output(result.output);
+	}
+
+	if (obj_ref.is_null()) {
+		memdelete(obj);
+	}
+
+	enable_stdout();
+	return result;
+}
+
+GDScriptTest::TestResult GDScriptTest::run_test() {
+	return execute_test_code(false);
+}
+
+bool GDScriptTest::generate_output() {
+	TestResult result = execute_test_code(true);
+	if (result.status == GDTEST_LOAD_ERROR) {
+		return false;
+	}
+
+	Error err = OK;
+	FileAccessRef out_file = FileAccess::open(output_file, FileAccess::WRITE, &err);
+	if (err != OK) {
+		return false;
+	}
+
+	String output = result.output.strip_edges(); // TODO: may be hacky.
+	output += "\n"; // Make sure to insert newline for CI static checks.
+
+	out_file->store_string(output);
+	out_file->close();
+
+	return true;
+}
+
+} // namespace GDScriptTests
diff --git a/modules/gdscript/tests/gdscript_test_runner.h b/modules/gdscript/tests/gdscript_test_runner.h
new file mode 100644
index 0000000000..9b2d14a371
--- /dev/null
+++ b/modules/gdscript/tests/gdscript_test_runner.h
@@ -0,0 +1,126 @@
+/*************************************************************************/
+/*  gdscript_test_runner.h                                               */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef GDSCRIPT_TEST_H
+#define GDSCRIPT_TEST_H
+
+#include "../gdscript.h"
+#include "core/error/error_macros.h"
+#include "core/string/print_string.h"
+#include "core/string/ustring.h"
+#include "core/templates/vector.h"
+
+namespace GDScriptTests {
+
+void init_autoloads();
+void init_language(const String &p_base_path);
+void finish_language();
+
+// Single test instance in a suite.
+class GDScriptTest {
+public:
+	enum TestStatus {
+		GDTEST_OK,
+		GDTEST_LOAD_ERROR,
+		GDTEST_PARSER_ERROR,
+		GDTEST_ANALYZER_ERROR,
+		GDTEST_COMPILER_ERROR,
+		GDTEST_RUNTIME_ERROR,
+	};
+
+	struct TestResult {
+		TestStatus status;
+		String output;
+		bool passed;
+	};
+
+private:
+	struct ErrorHandlerData {
+		TestResult *result;
+		GDScriptTest *self;
+		ErrorHandlerData(TestResult *p_result, GDScriptTest *p_this) {
+			result = p_result;
+			self = p_this;
+		}
+	};
+
+	String source_file;
+	String output_file;
+	String base_dir;
+
+	PrintHandlerList _print_handler;
+	ErrorHandlerList _error_handler;
+
+	void enable_stdout();
+	void disable_stdout();
+	bool check_output(const String &p_output) const;
+	String get_text_for_status(TestStatus p_status) const;
+
+	TestResult execute_test_code(bool p_is_generating);
+
+public:
+	static void print_handler(void *p_this, const String &p_message, bool p_error);
+	static void error_handler(void *p_this, const char *p_function, const char *p_file, int p_line, const char *p_error, const char *p_explanation, ErrorHandlerType p_type);
+	TestResult run_test();
+	bool generate_output();
+
+	const String &get_source_file() const { return source_file; }
+	const String &get_output_file() const { return output_file; }
+
+	GDScriptTest(const String &p_source_path, const String &p_output_path, const String &p_base_dir);
+	GDScriptTest() :
+			GDScriptTest(String(), String(), String()) {} // Needed to use in Vector.
+};
+
+class GDScriptTestRunner {
+	String source_dir;
+	Vector<GDScriptTest> tests;
+
+	bool is_generating = false;
+	bool do_init_languages = false;
+
+	bool make_tests();
+	bool make_tests_for_dir(const String &p_dir);
+	bool generate_class_index();
+
+public:
+	static StringName test_function_name;
+
+	static void handle_cmdline();
+	int run_tests();
+	bool generate_outputs();
+
+	GDScriptTestRunner(const String &p_source_dir, bool p_init_language);
+	~GDScriptTestRunner();
+};
+
+} // namespace GDScriptTests
+
+#endif // GDSCRIPT_TEST_H
diff --git a/modules/etc/texture_loader_pkm.h b/modules/gdscript/tests/gdscript_test_runner_suite.h
index 2ed5e75807..136907b316 100644
--- a/modules/etc/texture_loader_pkm.h
+++ b/modules/gdscript/tests/gdscript_test_runner_suite.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  texture_loader_pkm.h                                                 */
+/*  gdscript_test_runner_suite.h                                         */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,20 +28,26 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef TEXTURE_LOADER_PKM_H
-#define TEXTURE_LOADER_PKM_H
+#ifndef GDSCRIPT_TEST_RUNNER_SUITE_H
+#define GDSCRIPT_TEST_RUNNER_SUITE_H
 
-#include "core/io/resource_loader.h"
-#include "scene/resources/texture.h"
+#include "gdscript_test_runner.h"
+#include "tests/test_macros.h"
 
-class ResourceFormatPKM : public ResourceFormatLoader {
-public:
-	virtual RES load(const String &p_path, const String &p_original_path = "", Error *r_error = nullptr, bool p_use_sub_threads = false, float *r_progress = nullptr, CacheMode p_cache_mode = CACHE_MODE_REUSE);
-	virtual void get_recognized_extensions(List<String> *p_extensions) const;
-	virtual bool handles_type(const String &p_type) const;
-	virtual String get_resource_type(const String &p_path) const;
+namespace GDScriptTests {
 
-	virtual ~ResourceFormatPKM() {}
-};
+TEST_SUITE("[Modules][GDScript]") {
+	// GDScript 2.0 is still under heavy construction.
+	// Allow the tests to fail, but do not ignore errors during development.
+	// Update the scripts and expected output as needed.
+	TEST_CASE("Script compilation and runtime") {
+		GDScriptTestRunner runner("modules/gdscript/tests/scripts", true);
+		int fail_count = runner.run_tests();
+		INFO("Make sure `*.out` files have expected results.");
+		REQUIRE_MESSAGE(fail_count == 0, "All GDScript tests should pass.");
+	}
+}
 
-#endif // TEXTURE_LOADER_PKM_H
+} // namespace GDScriptTests
+
+#endif // GDSCRIPT_TEST_RUNNER_SUITE_H
diff --git a/modules/gdscript/tests/scripts/.gitignore b/modules/gdscript/tests/scripts/.gitignore
new file mode 100644
index 0000000000..94c5b1bf6b
--- /dev/null
+++ b/modules/gdscript/tests/scripts/.gitignore
@@ -0,0 +1,2 @@
+# Ignore metadata if someone open this on Godot.
+/.godot
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-argument.gd b/modules/gdscript/tests/scripts/parser-errors/missing-argument.gd
new file mode 100644
index 0000000000..c56ad94095
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-argument.gd
@@ -0,0 +1,6 @@
+func args(a, b):
+    print(a)
+    print(b)
+
+func test():
+    args(1,)
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-argument.out b/modules/gdscript/tests/scripts/parser-errors/missing-argument.out
new file mode 100644
index 0000000000..fc2a891109
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-argument.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Too few arguments for "args()" call. Expected at least 2 but received 1.
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.gd b/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.gd
new file mode 100644
index 0000000000..a1077e1985
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.gd
@@ -0,0 +1,2 @@
+func test():
+    var a = ("missing paren ->"
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.out b/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.out
new file mode 100644
index 0000000000..7326afa33d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-closing-expr-paren.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expected closing ")" after grouping expression.
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-colon.gd b/modules/gdscript/tests/scripts/parser-errors/missing-colon.gd
new file mode 100644
index 0000000000..62cb633e9e
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-colon.gd
@@ -0,0 +1,3 @@
+func test():
+    if true # Missing colon here.
+        print("true")
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-colon.out b/modules/gdscript/tests/scripts/parser-errors/missing-colon.out
new file mode 100644
index 0000000000..687b963bc8
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-colon.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expected ":" after "if" condition.
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.gd b/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.gd
new file mode 100644
index 0000000000..116b0151da
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.gd
@@ -0,0 +1,6 @@
+func args(a, b):
+    print(a)
+    print(b)
+
+func test():
+    args(1,2
diff --git a/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.out b/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.out
new file mode 100644
index 0000000000..34ea7ac323
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/missing-paren-after-args.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expected closing ")" after call arguments.
diff --git a/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.gd b/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.gd
new file mode 100644
index 0000000000..9ad77f1432
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.gd
@@ -0,0 +1,3 @@
+func test():
+    print("Using spaces")
+	print("Using tabs")
diff --git a/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.out b/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.out
new file mode 100644
index 0000000000..6390de9788
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/mixing-tabs-spaces.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Used "\t" for indentation instead " " as used before in the file.
diff --git a/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.gd b/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.gd
new file mode 100644
index 0000000000..3875ce3936
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.gd
@@ -0,0 +1,3 @@
+extends Node
+func test():
+    var a = $ # Expected some node path.
diff --git a/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.out b/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.out
new file mode 100644
index 0000000000..b3dc181a22
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/nothing-after-dollar.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expect node path as string or identifier after "$".
diff --git a/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.gd b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.gd
new file mode 100644
index 0000000000..1836d42226
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.gd
@@ -0,0 +1,3 @@
+extends Node
+func test():
+    $MyNode/23 # Can't use number here.
diff --git a/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.out b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.out
new file mode 100644
index 0000000000..dcb4ccecb0
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar-slash.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expect node path after "/".
diff --git a/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.gd b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.gd
new file mode 100644
index 0000000000..6fd2692d47
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.gd
@@ -0,0 +1,3 @@
+extends Node
+func test():
+    $23 # Can't use number here.
diff --git a/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.out b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.out
new file mode 100644
index 0000000000..b3dc181a22
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-errors/wrong-value-after-dollar.out
@@ -0,0 +1,2 @@
+GDTEST_PARSER_ERROR
+Expect node path as string or identifier after "$".
diff --git a/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.gd b/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.gd
new file mode 100644
index 0000000000..08f2eedb2d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.gd
@@ -0,0 +1,2 @@
+func test():
+    print("A"); print("B")
diff --git a/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.out b/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.out
new file mode 100644
index 0000000000..fc03f3efe8
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/semicolon-as-end-statement.out
@@ -0,0 +1,3 @@
+GDTEST_OK
+A
+B
diff --git a/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.gd b/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.gd
new file mode 100644
index 0000000000..6097b11b10
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.gd
@@ -0,0 +1,7 @@
+# See https://github.com/godotengine/godot/issues/41066.
+
+func f(p, ): ## <-- no errors
+	print(p)
+
+func test():
+	f(0, ) ## <-- no error
diff --git a/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.out b/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.out
new file mode 100644
index 0000000000..94e2ec2af8
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/trailing-comma-in-function-args.out
@@ -0,0 +1,2 @@
+GDTEST_OK
+0
diff --git a/modules/gdscript/tests/scripts/parser-features/variable-declaration.gd b/modules/gdscript/tests/scripts/parser-features/variable-declaration.gd
new file mode 100644
index 0000000000..3b48f10ca7
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/variable-declaration.gd
@@ -0,0 +1,12 @@
+var a # No init.
+var b = 42 # Init.
+
+func test():
+	var c # No init, local.
+	var d = 23 # Init, local.
+
+	a = 1
+	c = 2
+
+	prints(a, b, c, d)
+	print("OK")
diff --git a/modules/gdscript/tests/scripts/parser-features/variable-declaration.out b/modules/gdscript/tests/scripts/parser-features/variable-declaration.out
new file mode 100644
index 0000000000..2e0a63c024
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-features/variable-declaration.out
@@ -0,0 +1,7 @@
+GDTEST_OK
+>> WARNING
+>> Line: 5
+>> UNASSIGNED_VARIABLE
+>> The variable 'c' was used but never assigned a value.
+1 42 2 23
+OK
diff --git a/modules/gdscript/tests/scripts/parser-warnings/unused-variable.gd b/modules/gdscript/tests/scripts/parser-warnings/unused-variable.gd
new file mode 100644
index 0000000000..68e3bd424f
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-warnings/unused-variable.gd
@@ -0,0 +1,2 @@
+func test():
+    var unused = "not used"
diff --git a/modules/gdscript/tests/scripts/parser-warnings/unused-variable.out b/modules/gdscript/tests/scripts/parser-warnings/unused-variable.out
new file mode 100644
index 0000000000..270e0e69c0
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser-warnings/unused-variable.out
@@ -0,0 +1,5 @@
+GDTEST_OK
+>> WARNING
+>> Line: 2
+>> UNUSED_VARIABLE
+>> The local variable 'unused' is declared but never used in the block. If this is intended, prefix it with an underscore: '_unused'
diff --git a/modules/gdscript/tests/scripts/project.godot b/modules/gdscript/tests/scripts/project.godot
new file mode 100644
index 0000000000..25b49c0abd
--- /dev/null
+++ b/modules/gdscript/tests/scripts/project.godot
@@ -0,0 +1,10 @@
+; This is not an actual project.
+; This config only exists to properly set up the test environment.
+; It also helps for opening Godot to edit the scripts, but please don't
+; let the editor changes be saved.
+
+config_version=4
+
+[application]
+
+config/name="GDScript Integration Test Suite"
diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp
index 3cc0eee672..e70f221c0a 100644
--- a/modules/gdscript/tests/test_gdscript.cpp
+++ b/modules/gdscript/tests/test_gdscript.cpp
@@ -47,7 +47,7 @@
 #include "editor/editor_settings.h"
 #endif
 
-namespace TestGDScript {
+namespace GDScriptTests {
 
 static void test_tokenizer(const String &p_code, const Vector<String> &p_lines) {
 	GDScriptTokenizer tokenizer;
@@ -183,60 +183,6 @@ static void test_compiler(const String &p_code, const String &p_script_path, con
 	}
 }
 
-void init_autoloads() {
-	Map<StringName, ProjectSettings::AutoloadInfo> autoloads = ProjectSettings::get_singleton()->get_autoload_list();
-
-	// First pass, add the constants so they exist before any script is loaded.
-	for (Map<StringName, ProjectSettings::AutoloadInfo>::Element *E = autoloads.front(); E; E = E->next()) {
-		const ProjectSettings::AutoloadInfo &info = E->get();
-
-		if (info.is_singleton) {
-			for (int i = 0; i < ScriptServer::get_language_count(); i++) {
-				ScriptServer::get_language(i)->add_global_constant(info.name, Variant());
-			}
-		}
-	}
-
-	// Second pass, load into global constants.
-	for (Map<StringName, ProjectSettings::AutoloadInfo>::Element *E = autoloads.front(); E; E = E->next()) {
-		const ProjectSettings::AutoloadInfo &info = E->get();
-
-		if (!info.is_singleton) {
-			// Skip non-singletons since we don't have a scene tree here anyway.
-			continue;
-		}
-
-		RES res = ResourceLoader::load(info.path);
-		ERR_CONTINUE_MSG(res.is_null(), "Can't autoload: " + info.path);
-		Node *n = nullptr;
-		if (res->is_class("PackedScene")) {
-			Ref<PackedScene> ps = res;
-			n = ps->instance();
-		} else if (res->is_class("Script")) {
-			Ref<Script> script_res = res;
-			StringName ibt = script_res->get_instance_base_type();
-			bool valid_type = ClassDB::is_parent_class(ibt, "Node");
-			ERR_CONTINUE_MSG(!valid_type, "Script does not inherit a Node: " + info.path);
-
-			Object *obj = ClassDB::instance(ibt);
-
-			ERR_CONTINUE_MSG(obj == nullptr,
-					"Cannot instance script for autoload, expected 'Node' inheritance, got: " +
-							String(ibt));
-
-			n = Object::cast_to<Node>(obj);
-			n->set_script(script_res);
-		}
-
-		ERR_CONTINUE_MSG(!n, "Path in autoload not a node or script: " + info.path);
-		n->set_name(info.name);
-
-		for (int i = 0; i < ScriptServer::get_language_count(); i++) {
-			ScriptServer::get_language(i)->add_global_constant(info.name, n);
-		}
-	}
-}
-
 void test(TestType p_type) {
 	List<String> cmdlargs = OS::get_singleton()->get_cmdline_args();
 
@@ -253,20 +199,8 @@ void test(TestType p_type) {
 	FileAccessRef fa = FileAccess::open(test, FileAccess::READ);
 	ERR_FAIL_COND_MSG(!fa, "Could not open file: " + test);
 
-	// Init PackedData since it's used by ProjectSettings.
-	PackedData *packed_data = memnew(PackedData);
-
-	// Setup project settings since it's needed by the languages to get the global scripts.
-	// This also sets up the base resource path.
-	Error err = ProjectSettings::get_singleton()->setup(fa->get_path_absolute().get_base_dir(), String(), true);
-	if (err) {
-		print_line("Could not load project settings.");
-		// Keep going since some scripts still work without this.
-	}
-
 	// Initialize the language for the test routine.
-	ScriptServer::init_languages();
-	init_autoloads();
+	init_language(fa->get_path_absolute().get_base_dir());
 
 	Vector<uint8_t> buf;
 	int flen = fa->get_len();
@@ -300,8 +234,6 @@ void test(TestType p_type) {
 			print_line("Not implemented.");
 	}
 
-	// Destroy stuff we set up earlier.
-	ScriptServer::finish_languages();
-	memdelete(packed_data);
+	finish_language();
 }
-} // namespace TestGDScript
+} // namespace GDScriptTests
diff --git a/modules/gdscript/tests/test_gdscript.h b/modules/gdscript/tests/test_gdscript.h
index bbda46cdad..c7ee5a2208 100644
--- a/modules/gdscript/tests/test_gdscript.h
+++ b/modules/gdscript/tests/test_gdscript.h
@@ -31,7 +31,10 @@
 #ifndef TEST_GDSCRIPT_H
 #define TEST_GDSCRIPT_H
 
-namespace TestGDScript {
+#include "gdscript_test_runner.h"
+#include "tests/test_macros.h"
+
+namespace GDScriptTests {
 
 enum TestType {
 	TEST_TOKENIZER,
@@ -41,6 +44,7 @@ enum TestType {
 };
 
 void test(TestType p_type);
-} // namespace TestGDScript
+
+} // namespace GDScriptTests
 
 #endif // TEST_GDSCRIPT_H
diff --git a/modules/gltf/gltf_document.cpp b/modules/gltf/gltf_document.cpp
index 0b70175a24..027a054b70 100644
--- a/modules/gltf/gltf_document.cpp
+++ b/modules/gltf/gltf_document.cpp
@@ -3293,6 +3293,7 @@ Error GLTFDocument::_serialize_materials(Ref<GLTFState> state) {
 				}
 				img->decompress();
 				img->convert(Image::FORMAT_RGBA8);
+				img->convert_ra_rgba8_to_rg();
 				for (int32_t y = 0; y < img->get_height(); y++) {
 					for (int32_t x = 0; x < img->get_width(); x++) {
 						Color c = img->get_pixel(x, y);
@@ -4958,8 +4959,8 @@ GLTFMeshIndex GLTFDocument::_convert_mesh_instance(Ref<GLTFState> state, MeshIns
 		if (godot_array_mesh.is_valid()) {
 			surface_name = godot_array_mesh->surface_get_name(surface_i);
 		}
-		if (p_mesh_instance->get_surface_material(surface_i).is_valid()) {
-			mat = p_mesh_instance->get_surface_material(surface_i);
+		if (p_mesh_instance->get_surface_override_material(surface_i).is_valid()) {
+			mat = p_mesh_instance->get_surface_override_material(surface_i);
 		}
 		if (p_mesh_instance->get_material_override().is_valid()) {
 			mat = p_mesh_instance->get_material_override();
diff --git a/modules/lightmapper_rd/lm_blendseams.glsl b/modules/lightmapper_rd/lm_blendseams.glsl
index e47e5fcc51..374c48082e 100644
--- a/modules/lightmapper_rd/lm_blendseams.glsl
+++ b/modules/lightmapper_rd/lm_blendseams.glsl
@@ -7,7 +7,7 @@ triangles = "#define MODE_TRIANGLES";
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "lm_common_inc.glsl"
 
@@ -74,7 +74,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "lm_common_inc.glsl"
 
diff --git a/modules/lightmapper_rd/lm_compute.glsl b/modules/lightmapper_rd/lm_compute.glsl
index eb9d817f99..3dd96893fb 100644
--- a/modules/lightmapper_rd/lm_compute.glsl
+++ b/modules/lightmapper_rd/lm_compute.glsl
@@ -10,7 +10,7 @@ light_probes = "#define MODE_LIGHT_PROBES";
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 // One 2D local group focusing in one layer at a time, though all
 // in parallel (no barriers) makes more sense than a 3D local group
diff --git a/modules/lightmapper_rd/lm_raster.glsl b/modules/lightmapper_rd/lm_raster.glsl
index 6c2904192b..55ca193cc1 100644
--- a/modules/lightmapper_rd/lm_raster.glsl
+++ b/modules/lightmapper_rd/lm_raster.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "lm_common_inc.glsl"
 
@@ -56,7 +56,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "lm_common_inc.glsl"
 
diff --git a/modules/mono/mono_gd/gd_mono_wasm_m2n.h b/modules/mono/mono_gd/gd_mono_wasm_m2n.h
index 159a2ed7b6..366662ff81 100644
--- a/modules/mono/mono_gd/gd_mono_wasm_m2n.h
+++ b/modules/mono/mono_gd/gd_mono_wasm_m2n.h
@@ -176,7 +176,7 @@ T m2n_arg_cast(Mono_InterpMethodArguments *p_margs, size_t p_idx) {
 	} else if constexpr (cookie == 'F') {
 		return *reinterpret_cast<float *>(&p_margs->fargs[fidx(p_idx)]);
 	} else if constexpr (cookie == 'D') {
-		return (T)(size_t)p_margs->fargs[p_idx];
+		return (T)p_margs->fargs[p_idx];
 	}
 }
 
diff --git a/modules/squish/image_compress_squish.cpp b/modules/squish/image_decompress_squish.cpp
index cce08034df..1450b0fe88 100644
--- a/modules/squish/image_compress_squish.cpp
+++ b/modules/squish/image_decompress_squish.cpp
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  image_compress_squish.cpp                                            */
+/*  image_decompress_squish.cpp                                          */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,7 +28,7 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "image_compress_squish.h"
+#include "image_decompress_squish.h"
 
 #include <squish.h>
 
@@ -76,83 +76,3 @@ void image_decompress_squish(Image *p_image) {
 		p_image->convert_ra_rgba8_to_rg();
 	}
 }
-
-void image_compress_squish(Image *p_image, float p_lossy_quality, Image::UsedChannels p_channels) {
-	if (p_image->get_format() >= Image::FORMAT_DXT1) {
-		return; //do not compress, already compressed
-	}
-
-	int w = p_image->get_width();
-	int h = p_image->get_height();
-
-	if (p_image->get_format() <= Image::FORMAT_RGBA8) {
-		int squish_comp = squish::kColourRangeFit;
-
-		if (p_lossy_quality > 0.85) {
-			squish_comp = squish::kColourIterativeClusterFit;
-		} else if (p_lossy_quality > 0.75) {
-			squish_comp = squish::kColourClusterFit;
-		}
-
-		Image::Format target_format = Image::FORMAT_RGBA8;
-
-		p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert
-
-		switch (p_channels) {
-			case Image::USED_CHANNELS_L: {
-				target_format = Image::FORMAT_DXT1;
-				squish_comp |= squish::kDxt1;
-			} break;
-			case Image::USED_CHANNELS_LA: {
-				target_format = Image::FORMAT_DXT5;
-				squish_comp |= squish::kDxt5;
-			} break;
-			case Image::USED_CHANNELS_R: {
-				target_format = Image::FORMAT_RGTC_R;
-				squish_comp |= squish::kBc4;
-			} break;
-			case Image::USED_CHANNELS_RG: {
-				target_format = Image::FORMAT_RGTC_RG;
-				squish_comp |= squish::kBc5;
-			} break;
-			case Image::USED_CHANNELS_RGB: {
-				target_format = Image::FORMAT_DXT1;
-				squish_comp |= squish::kDxt1;
-			} break;
-			case Image::USED_CHANNELS_RGBA: {
-				//TODO, should convert both, then measure which one does a better job
-				target_format = Image::FORMAT_DXT5;
-				squish_comp |= squish::kDxt5;
-
-			} break;
-			default: {
-				ERR_PRINT("Unknown image format, defaulting to RGBA8");
-				break;
-			}
-		}
-
-		Vector<uint8_t> data;
-		int target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
-		int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0;
-		data.resize(target_size);
-		int shift = Image::get_format_pixel_rshift(target_format);
-
-		const uint8_t *rb = p_image->get_data().ptr();
-		uint8_t *wb = data.ptrw();
-
-		int dst_ofs = 0;
-
-		for (int i = 0; i <= mm_count; i++) {
-			int bw = w % 4 != 0 ? w + (4 - w % 4) : w;
-			int bh = h % 4 != 0 ? h + (4 - h % 4) : h;
-
-			int src_ofs = p_image->get_mipmap_offset(i);
-			squish::CompressImage(&rb[src_ofs], w, h, &wb[dst_ofs], squish_comp);
-			dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift;
-			w = MAX(w / 2, 1);
-			h = MAX(h / 2, 1);
-		}
-
-		p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
-	}
-}
diff --git a/modules/etc/image_compress_etc.h b/modules/squish/image_decompress_squish.h
index 44a06194e9..fff5839ac4 100644
--- a/modules/etc/image_compress_etc.h
+++ b/modules/squish/image_decompress_squish.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  image_compress_etc.h                                                 */
+/*  image_decompress_squish.h                                            */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,9 +28,11 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef IMAGE_COMPRESS_ETC_H
-#define IMAGE_COMPRESS_ETC_H
+#ifndef IMAGE_DECOMPRESS_SQUISH_H
+#define IMAGE_DECOMPRESS_SQUISH_H
 
-void _register_etc_compress_func();
+#include "core/io/image.h"
 
-#endif // IMAGE_COMPRESS_ETC_H
+void image_decompress_squish(Image *p_image);
+
+#endif // IMAGE_DECOMPRESS_SQUISH_H
diff --git a/modules/squish/register_types.cpp b/modules/squish/register_types.cpp
index 451e9d8e93..51aab040e7 100644
--- a/modules/squish/register_types.cpp
+++ b/modules/squish/register_types.cpp
@@ -29,10 +29,10 @@
 /*************************************************************************/
 
 #include "register_types.h"
-#include "image_compress_squish.h"
+
+#include "image_decompress_squish.h"
 
 void register_squish_types() {
-	Image::set_compress_bc_func(image_compress_squish);
 	Image::_image_decompress_bc = image_decompress_squish;
 }
 
diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp
index c5f6dc0d99..19f26c87cd 100644
--- a/modules/theora/video_stream_theora.cpp
+++ b/modules/theora/video_stream_theora.cpp
@@ -603,6 +603,7 @@ float VideoStreamPlaybackTheora::get_playback_position() const {
 };
 
 void VideoStreamPlaybackTheora::seek(float p_time) {
+	WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams).");
 }
 
 void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback, void *p_userdata) {
diff --git a/modules/visual_script/visual_script_editor.cpp b/modules/visual_script/visual_script_editor.cpp
index 3cdf60708b..02ec9ccd06 100644
--- a/modules/visual_script/visual_script_editor.cpp
+++ b/modules/visual_script/visual_script_editor.cpp
@@ -710,7 +710,7 @@ void VisualScriptEditor::_update_graph(int p_only_id) {
 			has_gnode_text = true;
 			LineEdit *line_edit = memnew(LineEdit);
 			line_edit->set_text(node->get_text());
-			line_edit->set_expand_to_text_length(true);
+			line_edit->set_expand_to_text_length_enabled(true);
 			line_edit->add_theme_font_override("font", get_theme_font("source", "EditorFonts"));
 			gnode->add_child(line_edit);
 			line_edit->connect("text_changed", callable_mp(this, &VisualScriptEditor::_expression_text_changed), varray(E->get()));
@@ -843,7 +843,7 @@ void VisualScriptEditor::_update_graph(int p_only_id) {
 						hbc->add_child(name_box);
 						name_box->set_custom_minimum_size(Size2(60 * EDSCALE, 0));
 						name_box->set_text(left_name);
-						name_box->set_expand_to_text_length(true);
+						name_box->set_expand_to_text_length_enabled(true);
 						name_box->connect("resized", callable_mp(this, &VisualScriptEditor::_update_node_size), varray(E->get()));
 						name_box->connect("focus_exited", callable_mp(this, &VisualScriptEditor::_port_name_focus_out), varray(name_box, E->get(), i, true));
 					} else {
@@ -938,7 +938,7 @@ void VisualScriptEditor::_update_graph(int p_only_id) {
 						hbc->add_child(name_box);
 						name_box->set_custom_minimum_size(Size2(60 * EDSCALE, 0));
 						name_box->set_text(right_name);
-						name_box->set_expand_to_text_length(true);
+						name_box->set_expand_to_text_length_enabled(true);
 						name_box->connect("resized", callable_mp(this, &VisualScriptEditor::_update_node_size), varray(E->get()));
 						name_box->connect("focus_exited", callable_mp(this, &VisualScriptEditor::_port_name_focus_out), varray(name_box, E->get(), i, false));
 					} else {
@@ -4322,7 +4322,7 @@ VisualScriptEditor::VisualScriptEditor() {
 	function_name_box = memnew(LineEdit);
 	function_name_edit->add_child(function_name_box);
 	function_name_box->connect("gui_input", callable_mp(this, &VisualScriptEditor::_fn_name_box_input));
-	function_name_box->set_expand_to_text_length(true);
+	function_name_box->set_expand_to_text_length_enabled(true);
 	add_child(function_name_edit);
 
 	///       Actual Graph          ///
diff --git a/modules/webm/video_stream_webm.cpp b/modules/webm/video_stream_webm.cpp
index 101001cba0..a6b64b342e 100644
--- a/modules/webm/video_stream_webm.cpp
+++ b/modules/webm/video_stream_webm.cpp
@@ -194,7 +194,7 @@ float VideoStreamPlaybackWebm::get_playback_position() const {
 }
 
 void VideoStreamPlaybackWebm::seek(float p_time) {
-	//Not implemented
+	WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams).");
 }
 
 void VideoStreamPlaybackWebm::set_audio_track(int p_idx) {
diff --git a/platform/android/export/export.cpp b/platform/android/export/export.cpp
index 5f9f420b59..f3502b2220 100644
--- a/platform/android/export/export.cpp
+++ b/platform/android/export/export.cpp
@@ -201,8 +201,10 @@ static const char *android_perms[] = {
 	nullptr
 };
 
-static const char *SPLASH_IMAGE_EXPORT_PATH = "res/drawable/splash.png";
-static const char *SPLASH_BG_COLOR_PATH = "res/drawable/splash_bg_color.png";
+static const char *SPLASH_IMAGE_EXPORT_PATH = "res/drawable-nodpi/splash.png";
+static const char *LEGACY_BUILD_SPLASH_IMAGE_EXPORT_PATH = "res/drawable-nodpi-v4/splash.png";
+static const char *SPLASH_BG_COLOR_PATH = "res/drawable-nodpi/splash_bg_color.png";
+static const char *LEGACY_BUILD_SPLASH_BG_COLOR_PATH = "res/drawable-nodpi-v4/splash_bg_color.png";
 static const char *SPLASH_CONFIG_PATH = "res://android/build/res/drawable/splash_drawable.xml";
 
 const String SPLASH_CONFIG_XML_CONTENT = R"SPLASH(<?xml version="1.0" encoding="utf-8"?>
@@ -210,7 +212,7 @@ const String SPLASH_CONFIG_XML_CONTENT = R"SPLASH(<?xml version="1.0" encoding="
 	<item android:drawable="@drawable/splash_bg_color" />
 	<item>
 		<bitmap
-				android:gravity="%s"
+				android:gravity="center"
 				android:filter="%s"
 				android:src="@drawable/splash" />
 	</item>
@@ -1502,6 +1504,21 @@ class EditorExportPlatformAndroid : public EditorExportPlatform {
 			splash_image = Ref<Image>(memnew(Image(boot_splash_png)));
 		}
 
+		if (scale_splash) {
+			Size2 screen_size = Size2(ProjectSettings::get_singleton()->get("display/window/size/width"), ProjectSettings::get_singleton()->get("display/window/size/height"));
+			int width, height;
+			if (screen_size.width > screen_size.height) {
+				// scale horizontally
+				height = screen_size.height;
+				width = splash_image->get_width() * screen_size.height / splash_image->get_height();
+			} else {
+				// scale vertically
+				width = screen_size.width;
+				height = splash_image->get_height() * screen_size.width / splash_image->get_width();
+			}
+			splash_image->resize(width, height);
+		}
+
 		// Setup the splash bg color
 		bool bg_color_valid;
 		Color bg_color = ProjectSettings::get_singleton()->get("application/boot_splash/bg_color", &bg_color_valid);
@@ -1514,8 +1531,7 @@ class EditorExportPlatformAndroid : public EditorExportPlatform {
 		splash_bg_color_image->create(splash_image->get_width(), splash_image->get_height(), false, splash_image->get_format());
 		splash_bg_color_image->fill(bg_color);
 
-		String gravity = scale_splash ? "fill" : "center";
-		String processed_splash_config_xml = vformat(SPLASH_CONFIG_XML_CONTENT, gravity, bool_to_string(apply_filter));
+		String processed_splash_config_xml = vformat(SPLASH_CONFIG_XML_CONTENT, bool_to_string(apply_filter));
 		return processed_splash_config_xml;
 	}
 
@@ -1803,7 +1819,7 @@ public:
 			p_debug_flags |= DEBUG_FLAG_REMOTE_DEBUG_LOCALHOST;
 		}
 
-		String tmp_export_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport.apk");
+		String tmp_export_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport." + uitos(OS::get_singleton()->get_unix_time()) + ".apk");
 
 #define CLEANUP_AND_RETURN(m_err)                         \
 	{                                                     \
@@ -1820,6 +1836,7 @@ public:
 
 		List<String> args;
 		int rv;
+		String output;
 
 		bool remove_prev = p_preset->get("one_click_deploy/clear_previous_install");
 		String version_name = p_preset->get("version/name");
@@ -1837,7 +1854,9 @@ public:
 			args.push_back("uninstall");
 			args.push_back(get_package_name(package_name));
 
-			err = OS::get_singleton()->execute(adb, args, nullptr, &rv);
+			output.clear();
+			err = OS::get_singleton()->execute(adb, args, &output, &rv, true);
+			print_verbose(output);
 		}
 
 		print_line("Installing to device (please wait...): " + devices[p_device].name);
@@ -1852,7 +1871,9 @@ public:
 		args.push_back("-r");
 		args.push_back(tmp_export_path);
 
-		err = OS::get_singleton()->execute(adb, args, nullptr, &rv);
+		output.clear();
+		err = OS::get_singleton()->execute(adb, args, &output, &rv, true);
+		print_verbose(output);
 		if (err || rv != 0) {
 			EditorNode::add_io_error("Could not install to device.");
 			CLEANUP_AND_RETURN(ERR_CANT_CREATE);
@@ -1869,7 +1890,9 @@ public:
 				args.push_back(devices[p_device].id);
 				args.push_back("reverse");
 				args.push_back("--remove-all");
-				OS::get_singleton()->execute(adb, args, nullptr, &rv);
+				output.clear();
+				OS::get_singleton()->execute(adb, args, &output, &rv, true);
+				print_verbose(output);
 
 				if (p_debug_flags & DEBUG_FLAG_REMOTE_DEBUG) {
 					int dbg_port = EditorSettings::get_singleton()->get("network/debug/remote_port");
@@ -1880,7 +1903,9 @@ public:
 					args.push_back("tcp:" + itos(dbg_port));
 					args.push_back("tcp:" + itos(dbg_port));
 
-					OS::get_singleton()->execute(adb, args, nullptr, &rv);
+					output.clear();
+					OS::get_singleton()->execute(adb, args, &output, &rv, true);
+					print_verbose(output);
 					print_line("Reverse result: " + itos(rv));
 				}
 
@@ -1894,7 +1919,9 @@ public:
 					args.push_back("tcp:" + itos(fs_port));
 					args.push_back("tcp:" + itos(fs_port));
 
-					err = OS::get_singleton()->execute(adb, args, nullptr, &rv);
+					output.clear();
+					err = OS::get_singleton()->execute(adb, args, &output, &rv, true);
+					print_verbose(output);
 					print_line("Reverse result2: " + itos(rv));
 				}
 			} else {
@@ -1922,7 +1949,9 @@ public:
 		args.push_back("-n");
 		args.push_back(get_package_name(package_name) + "/com.godot.game.GodotApp");
 
-		err = OS::get_singleton()->execute(adb, args, nullptr, &rv);
+		output.clear();
+		err = OS::get_singleton()->execute(adb, args, &output, &rv, true);
+		print_verbose(output);
 		if (err || rv != 0) {
 			EditorNode::add_io_error("Could not execute on device.");
 			CLEANUP_AND_RETURN(ERR_CANT_CREATE);
@@ -2319,6 +2348,7 @@ public:
 			return ERR_FILE_CANT_OPEN;
 		}
 
+		String output;
 		List<String> args;
 		args.push_back("sign");
 		args.push_back("--verbose");
@@ -2334,7 +2364,9 @@ public:
 			print_verbose("Signing debug binary using: " + String("\n") + apksigner + " " + join_list(args, String(" ")));
 		}
 		int retval;
-		OS::get_singleton()->execute(apksigner, args, nullptr, &retval);
+		output.clear();
+		OS::get_singleton()->execute(apksigner, args, &output, &retval, true);
+		print_verbose(output);
 		if (retval) {
 			EditorNode::add_io_error("'apksigner' returned with error #" + itos(retval));
 			return ERR_CANT_CREATE;
@@ -2352,7 +2384,9 @@ public:
 			print_verbose("Verifying signed build using: " + String("\n") + apksigner + " " + join_list(args, String(" ")));
 		}
 
-		OS::get_singleton()->execute(apksigner, args, nullptr, &retval);
+		output.clear();
+		OS::get_singleton()->execute(apksigner, args, &output, &retval, true);
+		print_verbose(output);
 		if (retval) {
 			EditorNode::add_io_error("'apksigner' verification of " + export_label + " failed.");
 			return ERR_CANT_CREATE;
@@ -2657,7 +2691,7 @@ public:
 		FileAccess *dst_f = nullptr;
 		io2.opaque = &dst_f;
 
-		String tmp_unaligned_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport-unaligned.apk");
+		String tmp_unaligned_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport-unaligned." + uitos(OS::get_singleton()->get_unix_time()) + ".apk");
 
 #define CLEANUP_AND_RETURN(m_err)                            \
 	{                                                        \
@@ -2702,12 +2736,12 @@ public:
 			}
 
 			// Process the splash image
-			if (file == SPLASH_IMAGE_EXPORT_PATH && splash_image.is_valid() && !splash_image->is_empty()) {
+			if ((file == SPLASH_IMAGE_EXPORT_PATH || file == LEGACY_BUILD_SPLASH_IMAGE_EXPORT_PATH) && splash_image.is_valid() && !splash_image->is_empty()) {
 				_load_image_data(splash_image, data);
 			}
 
 			// Process the splash bg color image
-			if (file == SPLASH_BG_COLOR_PATH && splash_bg_color_image.is_valid() && !splash_bg_color_image->is_empty()) {
+			if ((file == SPLASH_BG_COLOR_PATH || file == LEGACY_BUILD_SPLASH_BG_COLOR_PATH) && splash_bg_color_image.is_valid() && !splash_bg_color_image->is_empty()) {
 				_load_image_data(splash_bg_color_image, data);
 			}
 
diff --git a/platform/android/java/app/config.gradle b/platform/android/java/app/config.gradle
index c0ae4007d2..ad9a19e2af 100644
--- a/platform/android/java/app/config.gradle
+++ b/platform/android/java/app/config.gradle
@@ -239,5 +239,5 @@ ext.shouldSign = { ->
 }
 
 ext.shouldNotStrip = { ->
-    return isAndroidStudio()
+    return isAndroidStudio() || project.hasProperty("doNotStrip")
 }
diff --git a/platform/android/java/app/res/drawable/splash.png b/platform/android/java/app/res/drawable-nodpi/splash.png
index 7bddd4325a..7bddd4325a 100644
--- a/platform/android/java/app/res/drawable/splash.png
+++ b/platform/android/java/app/res/drawable-nodpi/splash.png
diff --git a/platform/android/java/app/res/drawable/splash_bg_color.png b/platform/android/java/app/res/drawable-nodpi/splash_bg_color.png
index 004b6fd508..004b6fd508 100644
--- a/platform/android/java/app/res/drawable/splash_bg_color.png
+++ b/platform/android/java/app/res/drawable-nodpi/splash_bg_color.png
diff --git a/platform/android/java/build.gradle b/platform/android/java/build.gradle
index ec02b0fc7a..81570d9d86 100644
--- a/platform/android/java/build.gradle
+++ b/platform/android/java/build.gradle
@@ -122,16 +122,17 @@ task zipCustomBuild(type: Zip) {
     destinationDir(file(binDir))
 }
 
-/**
- * Master task used to coordinate the tasks defined above to generate the set of Godot templates.
- */
-task generateGodotTemplates(type: GradleBuild) {
+def templateExcludedBuildTask() {
     // We exclude these gradle tasks so we can run the scons command manually.
+    def excludedTasks = []
     for (String buildType : supportedTargets) {
-        startParameter.excludedTaskNames += ":lib:" + getSconsTaskName(buildType)
+        excludedTasks += ":lib:" + getSconsTaskName(buildType)
     }
+    return excludedTasks
+}
 
-    tasks = []
+def templateBuildTasks() {
+    def tasks = []
 
     // Only build the apks and aar files for which we have native shared libraries.
     for (String target : supportedTargets) {
@@ -152,6 +153,29 @@ task generateGodotTemplates(type: GradleBuild) {
         }
     }
 
+    return tasks
+}
+
+/**
+ * Master task used to coordinate the tasks defined above to generate the set of Godot templates.
+ */
+task generateGodotTemplates(type: GradleBuild) {
+    startParameter.excludedTaskNames = templateExcludedBuildTask()
+    tasks = templateBuildTasks()
+
+    finalizedBy 'zipCustomBuild'
+}
+
+/**
+ * Generates the same output as generateGodotTemplates but with dev symbols
+ */
+task generateDevTemplate (type: GradleBuild) {
+    // add parameter to set symbols to true
+    startParameter.projectProperties += [doNotStrip: true]
+
+    startParameter.excludedTaskNames = templateExcludedBuildTask()
+    tasks = templateBuildTasks()
+
     finalizedBy 'zipCustomBuild'
 }
 
diff --git a/platform/android/java/gradle.properties b/platform/android/java/gradle.properties
index 2dc069ad2f..6b3b62a9da 100644
--- a/platform/android/java/gradle.properties
+++ b/platform/android/java/gradle.properties
@@ -12,7 +12,7 @@ android.useAndroidX=true
 
 # Specifies the JVM arguments used for the daemon process.
 # The setting is particularly useful for tweaking memory settings.
-org.gradle.jvmargs=-Xmx1536m
+org.gradle.jvmargs=-Xmx4536m
 
 # When configured, Gradle will run in incubating parallel mode.
 # This option should only be used with decoupled projects. More details, visit
diff --git a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
index 4eac2d08d1..1ed16e04ca 100644
--- a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java
@@ -34,6 +34,7 @@ import android.content.Intent;
 import android.os.Bundle;
 import android.view.KeyEvent;
 
+import androidx.annotation.CallSuper;
 import androidx.annotation.NonNull;
 import androidx.annotation.Nullable;
 import androidx.fragment.app.FragmentActivity;
@@ -68,6 +69,24 @@ public abstract class FullScreenGodotApp extends FragmentActivity implements God
 		}
 	}
 
+	@CallSuper
+	@Override
+	public void onActivityResult(int requestCode, int resultCode, Intent data) {
+		super.onActivityResult(requestCode, resultCode, data);
+		if (godotFragment != null) {
+			godotFragment.onActivityResult(requestCode, resultCode, data);
+		}
+	}
+
+	@CallSuper
+	@Override
+	public void onRequestPermissionsResult(int requestCode, String[] permissions, int[] grantResults) {
+		super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+		if (godotFragment != null) {
+			godotFragment.onRequestPermissionsResult(requestCode, permissions, grantResults);
+		}
+	}
+
 	@Override
 	public void onBackPressed() {
 		if (godotFragment != null) {
diff --git a/platform/iphone/plugin/godot_plugin_config.h b/platform/iphone/plugin/godot_plugin_config.h
index f4e30c8349..e2546e733c 100644
--- a/platform/iphone/plugin/godot_plugin_config.h
+++ b/platform/iphone/plugin/godot_plugin_config.h
@@ -218,8 +218,9 @@ static inline uint64_t get_plugin_modification_time(const PluginConfigIOS &plugi
 	} else {
 		String file_path = plugin_config.binary.get_base_dir();
 		String file_name = plugin_config.binary.get_basename().get_file();
-		String release_file_name = file_path.plus_file(file_name + ".release.a");
-		String debug_file_name = file_path.plus_file(file_name + ".debug.a");
+		String plugin_extension = plugin_config.binary.get_extension();
+		String release_file_name = file_path.plus_file(file_name + ".release." + plugin_extension);
+		String debug_file_name = file_path.plus_file(file_name + ".debug." + plugin_extension);
 
 		last_updated = MAX(last_updated, FileAccess::get_modified_time(release_file_name));
 		last_updated = MAX(last_updated, FileAccess::get_modified_time(debug_file_name));
diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py
index 6b527c6fb5..646ae4d457 100644
--- a/platform/linuxbsd/detect.py
+++ b/platform/linuxbsd/detect.py
@@ -67,10 +67,10 @@ def get_opts():
         BoolVariable("use_static_cpp", "Link libgcc and libstdc++ statically for better portability", True),
         BoolVariable("use_coverage", "Test Godot coverage", False),
         BoolVariable("use_ubsan", "Use LLVM/GCC compiler undefined behavior sanitizer (UBSAN)", False),
-        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN))", False),
-        BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN))", False),
-        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False),
-        BoolVariable("use_msan", "Use LLVM/GCC compiler memory sanitizer (MSAN))", False),
+        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN)", False),
+        BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN)", False),
+        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN)", False),
+        BoolVariable("use_msan", "Use LLVM compiler memory sanitizer (MSAN)", False),
         BoolVariable("pulseaudio", "Detect and use PulseAudio", True),
         BoolVariable("udev", "Use udev for gamepad connection callbacks", True),
         BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True),
@@ -147,11 +147,23 @@ def configure(env):
         env.extra_suffix += "s"
 
         if env["use_ubsan"]:
-            env.Append(CCFLAGS=["-fsanitize=undefined"])
+            env.Append(
+                CCFLAGS=[
+                    "-fsanitize=undefined,shift,shift-exponent,integer-divide-by-zero,unreachable,vla-bound,null,return,signed-integer-overflow,bounds,float-divide-by-zero,float-cast-overflow,nonnull-attribute,returns-nonnull-attribute,bool,enum,vptr,pointer-overflow,builtin"
+                ]
+            )
             env.Append(LINKFLAGS=["-fsanitize=undefined"])
+            if env["use_llvm"]:
+                env.Append(
+                    CCFLAGS=[
+                        "-fsanitize=nullability-return,nullability-arg,function,nullability-assign,implicit-integer-sign-change"
+                    ]
+                )
+            else:
+                env.Append(CCFLAGS=["-fsanitize=bounds-strict"])
 
         if env["use_asan"]:
-            env.Append(CCFLAGS=["-fsanitize=address"])
+            env.Append(CCFLAGS=["-fsanitize=address,pointer-subtract,pointer-compare"])
             env.Append(LINKFLAGS=["-fsanitize=address"])
 
         if env["use_lsan"]:
@@ -162,8 +174,10 @@ def configure(env):
             env.Append(CCFLAGS=["-fsanitize=thread"])
             env.Append(LINKFLAGS=["-fsanitize=thread"])
 
-        if env["use_msan"]:
+        if env["use_msan"] and env["use_llvm"]:
             env.Append(CCFLAGS=["-fsanitize=memory"])
+            env.Append(CCFLAGS=["-fsanitize-memory-track-origins"])
+            env.Append(CCFLAGS=["-fsanitize-recover=memory"])
             env.Append(LINKFLAGS=["-fsanitize=memory"])
 
     if env["use_lto"]:
diff --git a/platform/osx/detect.py b/platform/osx/detect.py
index 5b320da82f..317e79d0ea 100644
--- a/platform/osx/detect.py
+++ b/platform/osx/detect.py
@@ -34,9 +34,8 @@ def get_opts():
         BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True),
         BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False),
         BoolVariable("use_ubsan", "Use LLVM/GCC compiler undefined behavior sanitizer (UBSAN)", False),
-        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN))", False),
-        BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN))", False),
-        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False),
+        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN)", False),
+        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN)", False),
     ]
 
 
@@ -132,21 +131,22 @@ def configure(env):
         env["AS"] = basecmd + "as"
         env.Append(CPPDEFINES=["__MACPORTS__"])  # hack to fix libvpx MM256_BROADCASTSI128_SI256 define
 
-    if env["use_ubsan"] or env["use_asan"] or env["use_lsan"] or env["use_tsan"]:
+    if env["use_ubsan"] or env["use_asan"] or env["use_tsan"]:
         env.extra_suffix += "s"
 
         if env["use_ubsan"]:
-            env.Append(CCFLAGS=["-fsanitize=undefined"])
+            env.Append(
+                CCFLAGS=[
+                    "-fsanitize=undefined,shift,shift-exponent,integer-divide-by-zero,unreachable,vla-bound,null,return,signed-integer-overflow,bounds,float-divide-by-zero,float-cast-overflow,nonnull-attribute,returns-nonnull-attribute,bool,enum,vptr,pointer-overflow,builtin"
+                ]
+            )
             env.Append(LINKFLAGS=["-fsanitize=undefined"])
+            env.Append(CCFLAGS=["-fsanitize=nullability-return,nullability-arg,function,nullability-assign"])
 
         if env["use_asan"]:
-            env.Append(CCFLAGS=["-fsanitize=address"])
+            env.Append(CCFLAGS=["-fsanitize=address,pointer-subtract,pointer-compare"])
             env.Append(LINKFLAGS=["-fsanitize=address"])
 
-        if env["use_lsan"]:
-            env.Append(CCFLAGS=["-fsanitize=leak"])
-            env.Append(LINKFLAGS=["-fsanitize=leak"])
-
         if env["use_tsan"]:
             env.Append(CCFLAGS=["-fsanitize=thread"])
             env.Append(LINKFLAGS=["-fsanitize=thread"])
diff --git a/platform/server/detect.py b/platform/server/detect.py
index 16ddbe1768..478bcad212 100644
--- a/platform/server/detect.py
+++ b/platform/server/detect.py
@@ -35,11 +35,11 @@ def get_opts():
         BoolVariable("use_static_cpp", "Link libgcc and libstdc++ statically for better portability", True),
         BoolVariable("use_coverage", "Test Godot coverage", False),
         BoolVariable("use_ubsan", "Use LLVM/GCC compiler undefined behavior sanitizer (UBSAN)", False),
-        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN))", False),
-        BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN))", False),
-        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False),
+        BoolVariable("use_asan", "Use LLVM/GCC compiler address sanitizer (ASAN)", False),
+        BoolVariable("use_lsan", "Use LLVM/GCC compiler leak sanitizer (LSAN)", False),
+        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN)", False),
+        BoolVariable("use_msan", "Use LLVM compiler memory sanitizer (MSAN)", False),
         BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True),
-        BoolVariable("use_msan", "Use LLVM/GCC compiler memory sanitizer (MSAN))", False),
         BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False),
         BoolVariable("execinfo", "Use libexecinfo on systems where glibc is not available", False),
     ]
@@ -104,11 +104,23 @@ def configure(env):
         env.extra_suffix += "s"
 
         if env["use_ubsan"]:
-            env.Append(CCFLAGS=["-fsanitize=undefined"])
+            env.Append(
+                CCFLAGS=[
+                    "-fsanitize=undefined,shift,shift-exponent,integer-divide-by-zero,unreachable,vla-bound,null,return,signed-integer-overflow,bounds,float-divide-by-zero,float-cast-overflow,nonnull-attribute,returns-nonnull-attribute,bool,enum,vptr,pointer-overflow,builtin"
+                ]
+            )
             env.Append(LINKFLAGS=["-fsanitize=undefined"])
+            if env["use_llvm"]:
+                env.Append(
+                    CCFLAGS=[
+                        "-fsanitize=nullability-return,nullability-arg,function,nullability-assign,implicit-integer-sign-change"
+                    ]
+                )
+            else:
+                env.Append(CCFLAGS=["-fsanitize=bounds-strict"])
 
         if env["use_asan"]:
-            env.Append(CCFLAGS=["-fsanitize=address"])
+            env.Append(CCFLAGS=["-fsanitize=address,pointer-subtract,pointer-compare"])
             env.Append(LINKFLAGS=["-fsanitize=address"])
 
         if env["use_lsan"]:
@@ -119,8 +131,10 @@ def configure(env):
             env.Append(CCFLAGS=["-fsanitize=thread"])
             env.Append(LINKFLAGS=["-fsanitize=thread"])
 
-        if env["use_msan"]:
+        if env["use_msan"] and env["use_llvm"]:
             env.Append(CCFLAGS=["-fsanitize=memory"])
+            env.Append(CCFLAGS=["-fsanitize-memory-track-origins"])
+            env.Append(CCFLAGS=["-fsanitize-recover=memory"])
             env.Append(LINKFLAGS=["-fsanitize=memory"])
 
     if env["use_lto"]:
diff --git a/platform/uwp/export/export.cpp b/platform/uwp/export/export.cpp
index 2a0bc78440..800a728033 100644
--- a/platform/uwp/export/export.cpp
+++ b/platform/uwp/export/export.cpp
@@ -1177,6 +1177,8 @@ public:
 	}
 
 	virtual Error export_project(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, int p_flags = 0) override {
+		ExportNotifier notifier(*this, p_preset, p_debug, p_path, p_flags);
+
 		String src_appx;
 
 		EditorProgress ep("export", "Exporting for UWP", 7, true);
diff --git a/scene/2d/area_2d.cpp b/scene/2d/area_2d.cpp
index 49d1654e3f..96b4abe1ba 100644
--- a/scene/2d/area_2d.cpp
+++ b/scene/2d/area_2d.cpp
@@ -600,6 +600,11 @@ void Area2D::_bind_methods() {
 	ADD_SIGNAL(MethodInfo("area_entered", PropertyInfo(Variant::OBJECT, "area", PROPERTY_HINT_RESOURCE_TYPE, "Area2D")));
 	ADD_SIGNAL(MethodInfo("area_exited", PropertyInfo(Variant::OBJECT, "area", PROPERTY_HINT_RESOURCE_TYPE, "Area2D")));
 
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitoring"), "set_monitoring", "is_monitoring");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitorable"), "set_monitorable", "is_monitorable");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "priority", PROPERTY_HINT_RANGE, "0,128,1"), "set_priority", "get_priority");
+
+	ADD_GROUP("Physics Overrides", "");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "space_override", PROPERTY_HINT_ENUM, "Disabled,Combine,Combine-Replace,Replace,Replace-Combine"), "set_space_override_mode", "get_space_override_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "gravity_point"), "set_gravity_is_point", "is_gravity_a_point");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "gravity_distance_scale", PROPERTY_HINT_EXP_RANGE, "0,1024,0.001,or_greater"), "set_gravity_distance_scale", "get_gravity_distance_scale");
@@ -607,9 +612,7 @@ void Area2D::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "gravity", PROPERTY_HINT_RANGE, "-1024,1024,0.001"), "set_gravity", "get_gravity");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "linear_damp", PROPERTY_HINT_RANGE, "0,100,0.001,or_greater"), "set_linear_damp", "get_linear_damp");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "angular_damp", PROPERTY_HINT_RANGE, "0,100,0.001,or_greater"), "set_angular_damp", "get_angular_damp");
-	ADD_PROPERTY(PropertyInfo(Variant::INT, "priority", PROPERTY_HINT_RANGE, "0,128,1"), "set_priority", "get_priority");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitoring"), "set_monitoring", "is_monitoring");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitorable"), "set_monitorable", "is_monitorable");
+
 	ADD_GROUP("Collision", "collision_");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "collision_layer", PROPERTY_HINT_LAYERS_2D_PHYSICS), "set_collision_layer", "get_collision_layer");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "collision_mask", PROPERTY_HINT_LAYERS_2D_PHYSICS), "set_collision_mask", "get_collision_mask");
diff --git a/scene/3d/area_3d.cpp b/scene/3d/area_3d.cpp
index 749cf4ff9d..7e57ef90da 100644
--- a/scene/3d/area_3d.cpp
+++ b/scene/3d/area_3d.cpp
@@ -650,6 +650,11 @@ void Area3D::_bind_methods() {
 	ADD_SIGNAL(MethodInfo("area_entered", PropertyInfo(Variant::OBJECT, "area", PROPERTY_HINT_RESOURCE_TYPE, "Area3D")));
 	ADD_SIGNAL(MethodInfo("area_exited", PropertyInfo(Variant::OBJECT, "area", PROPERTY_HINT_RESOURCE_TYPE, "Area3D")));
 
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitoring"), "set_monitoring", "is_monitoring");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitorable"), "set_monitorable", "is_monitorable");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "priority", PROPERTY_HINT_RANGE, "0,128,1"), "set_priority", "get_priority");
+
+	ADD_GROUP("Physics Overrides", "");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "space_override", PROPERTY_HINT_ENUM, "Disabled,Combine,Combine-Replace,Replace,Replace-Combine"), "set_space_override_mode", "get_space_override_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "gravity_point"), "set_gravity_is_point", "is_gravity_a_point");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "gravity_distance_scale", PROPERTY_HINT_EXP_RANGE, "0,1024,0.001,or_greater"), "set_gravity_distance_scale", "get_gravity_distance_scale");
@@ -657,15 +662,15 @@ void Area3D::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "gravity", PROPERTY_HINT_RANGE, "-1024,1024,0.01"), "set_gravity", "get_gravity");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "linear_damp", PROPERTY_HINT_RANGE, "0,100,0.001,or_greater"), "set_linear_damp", "get_linear_damp");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "angular_damp", PROPERTY_HINT_RANGE, "0,100,0.001,or_greater"), "set_angular_damp", "get_angular_damp");
-	ADD_PROPERTY(PropertyInfo(Variant::INT, "priority", PROPERTY_HINT_RANGE, "0,128,1"), "set_priority", "get_priority");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitoring"), "set_monitoring", "is_monitoring");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "monitorable"), "set_monitorable", "is_monitorable");
+
 	ADD_GROUP("Collision", "collision_");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "collision_layer", PROPERTY_HINT_LAYERS_3D_PHYSICS), "set_collision_layer", "get_collision_layer");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "collision_mask", PROPERTY_HINT_LAYERS_3D_PHYSICS), "set_collision_mask", "get_collision_mask");
+
 	ADD_GROUP("Audio Bus", "audio_bus_");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "audio_bus_override"), "set_audio_bus_override", "is_overriding_audio_bus");
 	ADD_PROPERTY(PropertyInfo(Variant::STRING_NAME, "audio_bus_name", PROPERTY_HINT_ENUM, ""), "set_audio_bus_name", "get_audio_bus_name");
+
 	ADD_GROUP("Reverb Bus", "reverb_bus_");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "reverb_bus_enable"), "set_use_reverb_bus", "is_using_reverb_bus");
 	ADD_PROPERTY(PropertyInfo(Variant::STRING_NAME, "reverb_bus_name", PROPERTY_HINT_ENUM, ""), "set_reverb_bus", "get_reverb_bus");
diff --git a/scene/3d/baked_lightmap.cpp b/scene/3d/baked_lightmap.cpp
index 95ffbe48c1..2e1b77dfe5 100644
--- a/scene/3d/baked_lightmap.cpp
+++ b/scene/3d/baked_lightmap.cpp
@@ -259,7 +259,7 @@ void BakedLightmap::_find_meshes_and_lights(Node *p_at_node, Vector<MeshesFound>
 					if (all_override.is_valid()) {
 						mf.overrides.push_back(all_override);
 					} else {
-						mf.overrides.push_back(mi->get_surface_material(i));
+						mf.overrides.push_back(mi->get_surface_override_material(i));
 					}
 				}
 
diff --git a/scene/3d/collision_object_3d.cpp b/scene/3d/collision_object_3d.cpp
index 352a793987..261ff5db55 100644
--- a/scene/3d/collision_object_3d.cpp
+++ b/scene/3d/collision_object_3d.cpp
@@ -75,6 +75,11 @@ void CollisionObject3D::_notification(int p_what) {
 			}
 
 		} break;
+		case NOTIFICATION_PREDELETE: {
+			if (debug_shape_count > 0) {
+				_clear_debug_shapes();
+			}
+		} break;
 	}
 }
 
@@ -116,11 +121,13 @@ void CollisionObject3D::_update_debug_shapes() {
 	for (Set<uint32_t>::Element *shapedata_idx = debug_shapes_to_update.front(); shapedata_idx; shapedata_idx = shapedata_idx->next()) {
 		if (shapes.has(shapedata_idx->get())) {
 			ShapeData &shapedata = shapes[shapedata_idx->get()];
+			ShapeData::ShapeBase *shapes = shapedata.shapes.ptrw();
 			for (int i = 0; i < shapedata.shapes.size(); i++) {
-				ShapeData::ShapeBase &s = shapedata.shapes.write[i];
+				ShapeData::ShapeBase &s = shapes[i];
 				if (s.debug_shape) {
 					s.debug_shape->queue_delete();
 					s.debug_shape = nullptr;
+					--debug_shape_count;
 				}
 				if (s.shape.is_null() || shapedata.disabled) {
 					continue;
@@ -133,12 +140,30 @@ void CollisionObject3D::_update_debug_shapes() {
 				add_child(mi);
 				mi->force_update_transform();
 				s.debug_shape = mi;
+				++debug_shape_count;
 			}
 		}
 	}
 	debug_shapes_to_update.clear();
 }
 
+void CollisionObject3D::_clear_debug_shapes() {
+	for (Map<uint32_t, ShapeData>::Element *E = shapes.front(); E; E = E->next()) {
+		ShapeData &shapedata = E->get();
+		ShapeData::ShapeBase *shapes = shapedata.shapes.ptrw();
+		for (int i = 0; i < shapedata.shapes.size(); i++) {
+			ShapeData::ShapeBase &s = shapes[i];
+			if (s.debug_shape) {
+				s.debug_shape->queue_delete();
+				s.debug_shape = nullptr;
+				--debug_shape_count;
+			}
+		}
+	}
+
+	debug_shape_count = 0;
+}
+
 void CollisionObject3D::_update_shape_data(uint32_t p_owner) {
 	if (is_inside_tree() && get_tree()->is_debugging_collisions_hint() && !Engine::get_singleton()->is_editor_hint()) {
 		if (debug_shapes_to_update.is_empty()) {
diff --git a/scene/3d/collision_object_3d.h b/scene/3d/collision_object_3d.h
index a2a0cbf988..e2f6cc7500 100644
--- a/scene/3d/collision_object_3d.h
+++ b/scene/3d/collision_object_3d.h
@@ -62,6 +62,7 @@ class CollisionObject3D : public Node3D {
 	bool ray_pickable = true;
 
 	Set<uint32_t> debug_shapes_to_update;
+	int debug_shape_count = 0;
 
 	void _update_pickable();
 
@@ -78,6 +79,7 @@ protected:
 	virtual void _mouse_exit();
 
 	void _update_debug_shapes();
+	void _clear_debug_shapes();
 
 public:
 	uint32_t create_shape_owner(Object *p_owner);
diff --git a/scene/3d/gi_probe.cpp b/scene/3d/gi_probe.cpp
index 0da53d0101..4d7fc29f15 100644
--- a/scene/3d/gi_probe.cpp
+++ b/scene/3d/gi_probe.cpp
@@ -343,7 +343,7 @@ void GIProbe::_find_meshes(Node *p_at_node, List<PlotMesh> &plot_meshes) {
 				pm.local_xform = xf;
 				pm.mesh = mesh;
 				for (int i = 0; i < mesh->get_surface_count(); i++) {
-					pm.instance_materials.push_back(mi->get_surface_material(i));
+					pm.instance_materials.push_back(mi->get_surface_override_material(i));
 				}
 				pm.override_material = mi->get_material_override();
 				plot_meshes.push_back(pm);
diff --git a/scene/3d/gpu_particles_collision_3d.cpp b/scene/3d/gpu_particles_collision_3d.cpp
index 97241be60f..628b823f89 100644
--- a/scene/3d/gpu_particles_collision_3d.cpp
+++ b/scene/3d/gpu_particles_collision_3d.cpp
@@ -346,7 +346,7 @@ void GPUParticlesCollisionSDF::_compute_sdf(ComputeSDFParams *params) {
 	ThreadWorkPool work_pool;
 	work_pool.init();
 	work_pool.begin_work(params->size.z, this, &GPUParticlesCollisionSDF::_compute_sdf_z, params);
-	while (work_pool.get_work_index() < (uint32_t)params->size.z) {
+	while (!work_pool.is_done_dispatching()) {
 		OS::get_singleton()->delay_usec(10000);
 		bake_step_function(work_pool.get_work_index() * 100 / params->size.z, "Baking SDF");
 	}
diff --git a/scene/3d/mesh_instance_3d.cpp b/scene/3d/mesh_instance_3d.cpp
index b997c64b29..7623ede0fc 100644
--- a/scene/3d/mesh_instance_3d.cpp
+++ b/scene/3d/mesh_instance_3d.cpp
@@ -51,13 +51,13 @@ bool MeshInstance3D::_set(const StringName &p_name, const Variant &p_value) {
 		return true;
 	}
 
-	if (p_name.operator String().begins_with("material/")) {
+	if (p_name.operator String().begins_with("surface_material_override/")) {
 		int idx = p_name.operator String().get_slicec('/', 1).to_int();
-		if (idx >= materials.size() || idx < 0) {
+		if (idx >= surface_override_materials.size() || idx < 0) {
 			return false;
 		}
 
-		set_surface_material(idx, p_value);
+		set_surface_override_material(idx, p_value);
 		return true;
 	}
 
@@ -75,12 +75,12 @@ bool MeshInstance3D::_get(const StringName &p_name, Variant &r_ret) const {
 		return true;
 	}
 
-	if (p_name.operator String().begins_with("material/")) {
+	if (p_name.operator String().begins_with("surface_material_override/")) {
 		int idx = p_name.operator String().get_slicec('/', 1).to_int();
-		if (idx >= materials.size() || idx < 0) {
+		if (idx >= surface_override_materials.size() || idx < 0) {
 			return false;
 		}
-		r_ret = materials[idx];
+		r_ret = surface_override_materials[idx];
 		return true;
 	}
 	return false;
@@ -100,7 +100,7 @@ void MeshInstance3D::_get_property_list(List<PropertyInfo> *p_list) const {
 
 	if (mesh.is_valid()) {
 		for (int i = 0; i < mesh->get_surface_count(); i++) {
-			p_list->push_back(PropertyInfo(Variant::OBJECT, "material/" + itos(i), PROPERTY_HINT_RESOURCE_TYPE, "ShaderMaterial,StandardMaterial3D", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_DEFERRED_SET_RESOURCE));
+			p_list->push_back(PropertyInfo(Variant::OBJECT, "surface_material_override/" + itos(i), PROPERTY_HINT_RESOURCE_TYPE, "ShaderMaterial,StandardMaterial3D", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_DEFERRED_SET_RESOURCE));
 		}
 	}
 }
@@ -126,7 +126,7 @@ void MeshInstance3D::set_mesh(const Ref<Mesh> &p_mesh) {
 		}
 
 		mesh->connect(CoreStringNames::get_singleton()->changed, callable_mp(this, &MeshInstance3D::_mesh_changed));
-		materials.resize(mesh->get_surface_count());
+		surface_override_materials.resize(mesh->get_surface_count());
 
 		set_base(mesh->get_rid());
 	} else {
@@ -277,26 +277,26 @@ void MeshInstance3D::_notification(int p_what) {
 	}
 }
 
-int MeshInstance3D::get_surface_material_count() const {
-	return materials.size();
+int MeshInstance3D::get_surface_override_material_count() const {
+	return surface_override_materials.size();
 }
 
-void MeshInstance3D::set_surface_material(int p_surface, const Ref<Material> &p_material) {
-	ERR_FAIL_INDEX(p_surface, materials.size());
+void MeshInstance3D::set_surface_override_material(int p_surface, const Ref<Material> &p_material) {
+	ERR_FAIL_INDEX(p_surface, surface_override_materials.size());
 
-	materials.write[p_surface] = p_material;
+	surface_override_materials.write[p_surface] = p_material;
 
-	if (materials[p_surface].is_valid()) {
-		RS::get_singleton()->instance_set_surface_material(get_instance(), p_surface, materials[p_surface]->get_rid());
+	if (surface_override_materials[p_surface].is_valid()) {
+		RS::get_singleton()->instance_set_surface_override_material(get_instance(), p_surface, surface_override_materials[p_surface]->get_rid());
 	} else {
-		RS::get_singleton()->instance_set_surface_material(get_instance(), p_surface, RID());
+		RS::get_singleton()->instance_set_surface_override_material(get_instance(), p_surface, RID());
 	}
 }
 
-Ref<Material> MeshInstance3D::get_surface_material(int p_surface) const {
-	ERR_FAIL_INDEX_V(p_surface, materials.size(), Ref<Material>());
+Ref<Material> MeshInstance3D::get_surface_override_material(int p_surface) const {
+	ERR_FAIL_INDEX_V(p_surface, surface_override_materials.size(), Ref<Material>());
 
-	return materials[p_surface];
+	return surface_override_materials[p_surface];
 }
 
 Ref<Material> MeshInstance3D::get_active_material(int p_surface) const {
@@ -305,7 +305,7 @@ Ref<Material> MeshInstance3D::get_active_material(int p_surface) const {
 		return material_override;
 	}
 
-	Ref<Material> surface_material = get_surface_material(p_surface);
+	Ref<Material> surface_material = get_surface_override_material(p_surface);
 	if (surface_material.is_valid()) {
 		return surface_material;
 	}
@@ -320,7 +320,7 @@ Ref<Material> MeshInstance3D::get_active_material(int p_surface) const {
 
 void MeshInstance3D::_mesh_changed() {
 	ERR_FAIL_COND(mesh.is_null());
-	materials.resize(mesh->get_surface_count());
+	surface_override_materials.resize(mesh->get_surface_count());
 }
 
 void MeshInstance3D::create_debug_tangents() {
@@ -408,9 +408,9 @@ void MeshInstance3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_skin", "skin"), &MeshInstance3D::set_skin);
 	ClassDB::bind_method(D_METHOD("get_skin"), &MeshInstance3D::get_skin);
 
-	ClassDB::bind_method(D_METHOD("get_surface_material_count"), &MeshInstance3D::get_surface_material_count);
-	ClassDB::bind_method(D_METHOD("set_surface_material", "surface", "material"), &MeshInstance3D::set_surface_material);
-	ClassDB::bind_method(D_METHOD("get_surface_material", "surface"), &MeshInstance3D::get_surface_material);
+	ClassDB::bind_method(D_METHOD("get_surface_override_material_count"), &MeshInstance3D::get_surface_override_material_count);
+	ClassDB::bind_method(D_METHOD("set_surface_override_material", "surface", "material"), &MeshInstance3D::set_surface_override_material);
+	ClassDB::bind_method(D_METHOD("get_surface_override_material", "surface"), &MeshInstance3D::get_surface_override_material);
 	ClassDB::bind_method(D_METHOD("get_active_material", "surface"), &MeshInstance3D::get_active_material);
 
 	ClassDB::bind_method(D_METHOD("create_trimesh_collision"), &MeshInstance3D::create_trimesh_collision);
diff --git a/scene/3d/mesh_instance_3d.h b/scene/3d/mesh_instance_3d.h
index eb300784b1..8aec227337 100644
--- a/scene/3d/mesh_instance_3d.h
+++ b/scene/3d/mesh_instance_3d.h
@@ -52,7 +52,7 @@ protected:
 	};
 
 	Map<StringName, BlendShapeTrack> blend_shape_tracks;
-	Vector<Ref<Material>> materials;
+	Vector<Ref<Material>> surface_override_materials;
 
 	void _mesh_changed();
 	void _resolve_skeleton_path();
@@ -75,9 +75,9 @@ public:
 	void set_skeleton_path(const NodePath &p_skeleton);
 	NodePath get_skeleton_path();
 
-	int get_surface_material_count() const;
-	void set_surface_material(int p_surface, const Ref<Material> &p_material);
-	Ref<Material> get_surface_material(int p_surface) const;
+	int get_surface_override_material_count() const;
+	void set_surface_override_material(int p_surface, const Ref<Material> &p_material);
+	Ref<Material> get_surface_override_material(int p_surface) const;
 	Ref<Material> get_active_material(int p_surface) const;
 
 	Node *create_trimesh_collision_node();
diff --git a/scene/3d/skeleton_3d.cpp b/scene/3d/skeleton_3d.cpp
index db5fc7593e..ebbb8985c9 100644
--- a/scene/3d/skeleton_3d.cpp
+++ b/scene/3d/skeleton_3d.cpp
@@ -387,6 +387,7 @@ void Skeleton3D::_notification(int p_what) {
 void Skeleton3D::clear_bones_global_pose_override() {
 	for (int i = 0; i < bones.size(); i += 1) {
 		bones.write[i].global_pose_override_amount = 0;
+		bones.write[i].global_pose_override_reset = true;
 	}
 	_make_dirty();
 }
diff --git a/scene/3d/skeleton_ik_3d.cpp b/scene/3d/skeleton_ik_3d.cpp
index 6cde6a9b17..898f94ccc1 100644
--- a/scene/3d/skeleton_ik_3d.cpp
+++ b/scene/3d/skeleton_ik_3d.cpp
@@ -270,7 +270,6 @@ void FabrikInverseKinematic::solve(Task *p_task, real_t blending_delta, bool ove
 		return; // Skip solving
 	}
 
-	// This line below is part of the problem - removing it fixes the issue with BoneAttachment nodes...
 	p_task->skeleton->set_bone_global_pose_override(p_task->chain.chain_root.bone, Transform(), 0.0, true);
 
 	if (p_task->chain.middle_chain_item) {
@@ -567,6 +566,9 @@ void SkeletonIK3D::start(bool p_one_time) {
 
 void SkeletonIK3D::stop() {
 	set_process_internal(false);
+	if (skeleton) {
+		skeleton->clear_bones_global_pose_override();
+	}
 }
 
 Transform SkeletonIK3D::_get_target_transform() {
diff --git a/scene/3d/soft_body_3d.cpp b/scene/3d/soft_body_3d.cpp
index 08e25b589e..98ac6aa65e 100644
--- a/scene/3d/soft_body_3d.cpp
+++ b/scene/3d/soft_body_3d.cpp
@@ -452,7 +452,7 @@ void SoftBody3D::become_mesh_owner() {
 		mesh_owner = true;
 
 		Vector<Ref<Material>> copy_materials;
-		copy_materials.append_array(materials);
+		copy_materials.append_array(surface_override_materials);
 
 		ERR_FAIL_COND(!mesh->get_surface_count());
 
@@ -472,7 +472,7 @@ void SoftBody3D::become_mesh_owner() {
 		set_mesh(soft_mesh);
 
 		for (int i = copy_materials.size() - 1; 0 <= i; --i) {
-			set_surface_material(i, copy_materials[i]);
+			set_surface_override_material(i, copy_materials[i]);
 		}
 	}
 }
diff --git a/scene/gui/line_edit.cpp b/scene/gui/line_edit.cpp
index 2d8eb3191c..124a07fa65 100644
--- a/scene/gui/line_edit.cpp
+++ b/scene/gui/line_edit.cpp
@@ -51,13 +51,13 @@ void LineEdit::_swap_current_input_direction() {
 	} else {
 		input_direction = TEXT_DIRECTION_LTR;
 	}
-	set_cursor_position(get_cursor_position());
+	set_caret_column(get_caret_column());
 	update();
 }
 
-void LineEdit::_move_cursor_left(bool p_select, bool p_move_by_word) {
+void LineEdit::_move_caret_left(bool p_select, bool p_move_by_word) {
 	if (selection.enabled && !p_select) {
-		set_cursor_position(selection.begin);
+		set_caret_column(selection.begin);
 		deselect();
 		return;
 	}
@@ -65,7 +65,7 @@ void LineEdit::_move_cursor_left(bool p_select, bool p_move_by_word) {
 	shift_selection_check_pre(p_select);
 
 	if (p_move_by_word) {
-		int cc = cursor_pos;
+		int cc = caret_column;
 
 		Vector<Vector2i> words = TS->shaped_text_get_word_breaks(text_rid);
 		for (int i = words.size() - 1; i >= 0; i--) {
@@ -75,21 +75,21 @@ void LineEdit::_move_cursor_left(bool p_select, bool p_move_by_word) {
 			}
 		}
 
-		set_cursor_position(cc);
+		set_caret_column(cc);
 	} else {
-		if (mid_grapheme_caret_enabled) {
-			set_cursor_position(get_cursor_position() - 1);
+		if (caret_mid_grapheme_enabled) {
+			set_caret_column(get_caret_column() - 1);
 		} else {
-			set_cursor_position(TS->shaped_text_prev_grapheme_pos(text_rid, get_cursor_position()));
+			set_caret_column(TS->shaped_text_prev_grapheme_pos(text_rid, get_caret_column()));
 		}
 	}
 
 	shift_selection_check_post(p_select);
 }
 
-void LineEdit::_move_cursor_right(bool p_select, bool p_move_by_word) {
+void LineEdit::_move_caret_right(bool p_select, bool p_move_by_word) {
 	if (selection.enabled && !p_select) {
-		set_cursor_position(selection.end);
+		set_caret_column(selection.end);
 		deselect();
 		return;
 	}
@@ -97,7 +97,7 @@ void LineEdit::_move_cursor_right(bool p_select, bool p_move_by_word) {
 	shift_selection_check_pre(p_select);
 
 	if (p_move_by_word) {
-		int cc = cursor_pos;
+		int cc = caret_column;
 
 		Vector<Vector2i> words = TS->shaped_text_get_word_breaks(text_rid);
 		for (int i = 0; i < words.size(); i++) {
@@ -107,27 +107,27 @@ void LineEdit::_move_cursor_right(bool p_select, bool p_move_by_word) {
 			}
 		}
 
-		set_cursor_position(cc);
+		set_caret_column(cc);
 	} else {
-		if (mid_grapheme_caret_enabled) {
-			set_cursor_position(get_cursor_position() + 1);
+		if (caret_mid_grapheme_enabled) {
+			set_caret_column(get_caret_column() + 1);
 		} else {
-			set_cursor_position(TS->shaped_text_next_grapheme_pos(text_rid, get_cursor_position()));
+			set_caret_column(TS->shaped_text_next_grapheme_pos(text_rid, get_caret_column()));
 		}
 	}
 
 	shift_selection_check_post(p_select);
 }
 
-void LineEdit::_move_cursor_start(bool p_select) {
+void LineEdit::_move_caret_start(bool p_select) {
 	shift_selection_check_pre(p_select);
-	set_cursor_position(0);
+	set_caret_column(0);
 	shift_selection_check_post(p_select);
 }
 
-void LineEdit::_move_cursor_end(bool p_select) {
+void LineEdit::_move_caret_end(bool p_select) {
 	shift_selection_check_pre(p_select);
-	set_cursor_position(text.length());
+	set_caret_column(text.length());
 	shift_selection_check_post(p_select);
 }
 
@@ -138,7 +138,7 @@ void LineEdit::_backspace(bool p_word, bool p_all_to_left) {
 
 	if (p_all_to_left) {
 		deselect();
-		text = text.substr(0, cursor_pos);
+		text = text.substr(0, caret_column);
 		_text_changed();
 		return;
 	}
@@ -149,7 +149,7 @@ void LineEdit::_backspace(bool p_word, bool p_all_to_left) {
 	}
 
 	if (p_word) {
-		int cc = cursor_pos;
+		int cc = caret_column;
 
 		Vector<Vector2i> words = TS->shaped_text_get_word_breaks(text_rid);
 		for (int i = words.size() - 1; i >= 0; i--) {
@@ -158,9 +158,9 @@ void LineEdit::_backspace(bool p_word, bool p_all_to_left) {
 			}
 		}
 
-		delete_text(cc, cursor_pos);
+		delete_text(cc, caret_column);
 
-		set_cursor_position(cc);
+		set_caret_column(cc);
 	} else {
 		delete_char();
 	}
@@ -173,9 +173,9 @@ void LineEdit::_delete(bool p_word, bool p_all_to_right) {
 
 	if (p_all_to_right) {
 		deselect();
-		text = text.substr(cursor_pos, text.length() - cursor_pos);
+		text = text.substr(caret_column, text.length() - caret_column);
 		_shape();
-		set_cursor_position(0);
+		set_caret_column(0);
 		_text_changed();
 		return;
 	}
@@ -187,12 +187,12 @@ void LineEdit::_delete(bool p_word, bool p_all_to_right) {
 
 	int text_len = text.length();
 
-	if (cursor_pos == text_len) {
+	if (caret_column == text_len) {
 		return; // Nothing to do.
 	}
 
 	if (p_word) {
-		int cc = cursor_pos;
+		int cc = caret_column;
 		Vector<Vector2i> words = TS->shaped_text_get_word_breaks(text_rid);
 		for (int i = 0; i < words.size(); i++) {
 			if (words[i].y > cc) {
@@ -201,15 +201,15 @@ void LineEdit::_delete(bool p_word, bool p_all_to_right) {
 			}
 		}
 
-		delete_text(cursor_pos, cc);
+		delete_text(caret_column, cc);
 	} else {
-		if (mid_grapheme_caret_enabled) {
-			set_cursor_position(cursor_pos + 1);
+		if (caret_mid_grapheme_enabled) {
+			set_caret_column(caret_column + 1);
 			delete_char();
 		} else {
-			int cc = cursor_pos;
-			set_cursor_position(TS->shaped_text_next_grapheme_pos(text_rid, cursor_pos));
-			delete_text(cc, cursor_pos);
+			int cc = caret_column;
+			set_caret_column(TS->shaped_text_next_grapheme_pos(text_rid, caret_column));
+			delete_text(cc, caret_column);
 		}
 	}
 }
@@ -250,10 +250,10 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 
 			shift_selection_check_pre(b->get_shift());
 
-			set_cursor_at_pixel_pos(b->get_position().x);
+			set_caret_at_pixel_pos(b->get_position().x);
 
 			if (b->get_shift()) {
-				selection_fill_at_cursor();
+				selection_fill_at_caret();
 				selection.creating = true;
 
 			} else {
@@ -265,18 +265,18 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 						selection.end = text.length();
 						selection.doubleclick = true;
 						selection.last_dblclk = 0;
-						cursor_pos = selection.begin;
+						caret_column = selection.begin;
 					} else if (b->is_doubleclick()) {
 						// Double-click select word.
 						Vector<Vector2i> words = TS->shaped_text_get_word_breaks(text_rid);
 						for (int i = 0; i < words.size(); i++) {
-							if (words[i].x < cursor_pos && words[i].y > cursor_pos) {
+							if (words[i].x < caret_column && words[i].y > caret_column) {
 								selection.enabled = true;
 								selection.begin = words[i].x;
 								selection.end = words[i].y;
 								selection.doubleclick = true;
 								selection.last_dblclk = OS::get_singleton()->get_ticks_msec();
-								cursor_pos = selection.end;
+								caret_column = selection.end;
 								break;
 							}
 						}
@@ -285,9 +285,9 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 
 				selection.drag_attempt = false;
 
-				if ((cursor_pos < selection.begin) || (cursor_pos > selection.end) || !selection.enabled) {
+				if ((caret_column < selection.begin) || (caret_column > selection.end) || !selection.enabled) {
 					deselect();
-					selection.cursor_start = cursor_pos;
+					selection.start_column = caret_column;
 					selection.creating = true;
 				} else if (selection.enabled) {
 					selection.drag_attempt = true;
@@ -331,8 +331,8 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 
 		if (m->get_button_mask() & MOUSE_BUTTON_LEFT) {
 			if (selection.creating) {
-				set_cursor_at_pixel_pos(m->get_position().x);
-				selection_fill_at_cursor();
+				set_caret_at_pixel_pos(m->get_position().x);
+				selection_fill_at_caret();
 			}
 		}
 	}
@@ -346,7 +346,7 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 
 		if (context_menu_enabled) {
 			if (k->is_action("ui_menu", true)) {
-				Point2 pos = Point2(get_cursor_pixel_pos().x, (get_size().y + get_theme_font("font")->get_height(get_theme_font_size("font_size"))) / 2);
+				Point2 pos = Point2(get_caret_pixel_pos().x, (get_size().y + get_theme_font("font")->get_height(get_theme_font_size("font_size"))) / 2);
 				menu->set_position(get_global_transform().xform(pos));
 				menu->set_size(Vector2(1, 1));
 				_generate_context_menu();
@@ -445,34 +445,34 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 		k->set_shift(false);
 
 		if (k->is_action("ui_text_caret_word_left", true)) {
-			_move_cursor_left(shift_pressed, true);
+			_move_caret_left(shift_pressed, true);
 			accept_event();
 			return;
 		}
 		if (k->is_action("ui_text_caret_left", true)) {
-			_move_cursor_left(shift_pressed);
+			_move_caret_left(shift_pressed);
 			accept_event();
 			return;
 		}
 		if (k->is_action("ui_text_caret_word_right", true)) {
-			_move_cursor_right(shift_pressed, true);
+			_move_caret_right(shift_pressed, true);
 			accept_event();
 			return;
 		}
 		if (k->is_action("ui_text_caret_right", true)) {
-			_move_cursor_right(shift_pressed, false);
+			_move_caret_right(shift_pressed, false);
 			accept_event();
 			return;
 		}
 
 		// Up = Home, Down = End
 		if (k->is_action("ui_text_caret_up", true) || k->is_action("ui_text_caret_line_start", true) || k->is_action("ui_text_caret_page_up", true)) {
-			_move_cursor_start(shift_pressed);
+			_move_caret_start(shift_pressed);
 			accept_event();
 			return;
 		}
 		if (k->is_action("ui_text_caret_down", true) || k->is_action("ui_text_caret_line_end", true) || k->is_action("ui_text_caret_page_down", true)) {
-			_move_cursor_end(shift_pressed);
+			_move_caret_end(shift_pressed);
 			accept_event();
 			return;
 		}
@@ -495,7 +495,7 @@ void LineEdit::_gui_input(Ref<InputEvent> p_event) {
 			selection_delete();
 			char32_t ucodestr[2] = { (char32_t)k->get_unicode(), 0 };
 			int prev_len = text.length();
-			append_at_cursor(ucodestr);
+			insert_text_at_caret(ucodestr);
 			if (text.length() != prev_len) {
 				_text_changed();
 			}
@@ -542,15 +542,15 @@ void LineEdit::drop_data(const Point2 &p_point, const Variant &p_data) {
 	Control::drop_data(p_point, p_data);
 
 	if (p_data.get_type() == Variant::STRING) {
-		set_cursor_at_pixel_pos(p_point.x);
+		set_caret_at_pixel_pos(p_point.x);
 		int selected = selection.end - selection.begin;
 
 		text.erase(selection.begin, selected);
 		_shape();
 
-		append_at_cursor(p_data);
-		selection.begin = cursor_pos - selected;
-		selection.end = cursor_pos;
+		insert_text_at_caret(p_data);
+		selection.begin = caret_column - selected;
+		selection.end = caret_column;
 	}
 }
 
@@ -575,8 +575,8 @@ void LineEdit::_notification(int p_what) {
 #ifdef TOOLS_ENABLED
 		case NOTIFICATION_ENTER_TREE: {
 			if (Engine::get_singleton()->is_editor_hint() && !get_tree()->is_node_being_edited(this)) {
-				cursor_set_blink_enabled(EDITOR_DEF("text_editor/cursor/caret_blink", false));
-				cursor_set_blink_speed(EDITOR_DEF("text_editor/cursor/caret_blink_speed", 0.65));
+				set_caret_blink_enabled(EDITOR_DEF("text_editor/cursor/caret_blink", false));
+				set_caret_blink_speed(EDITOR_DEF("text_editor/cursor/caret_blink_speed", 0.65));
 
 				if (!EditorSettings::get_singleton()->is_connected("settings_changed", callable_mp(this, &LineEdit::_editor_settings_changed))) {
 					EditorSettings::get_singleton()->connect("settings_changed", callable_mp(this, &LineEdit::_editor_settings_changed));
@@ -587,7 +587,7 @@ void LineEdit::_notification(int p_what) {
 		case NOTIFICATION_RESIZED: {
 			_fit_to_width();
 			scroll_offset = 0;
-			set_cursor_position(get_cursor_position());
+			set_caret_column(get_caret_column());
 		} break;
 		case NOTIFICATION_LAYOUT_DIRECTION_CHANGED:
 		case NOTIFICATION_THEME_CHANGED: {
@@ -674,7 +674,7 @@ void LineEdit::_notification(int p_what) {
 			Color selection_color = get_theme_color("selection_color");
 			Color font_color = is_editable() ? get_theme_color("font_color") : get_theme_color("font_uneditable_color");
 			Color font_selected_color = get_theme_color("font_selected_color");
-			Color cursor_color = get_theme_color("cursor_color");
+			Color caret_color = get_theme_color("caret_color");
 
 			// Draw placeholder color.
 			if (using_placeholder) {
@@ -778,7 +778,7 @@ void LineEdit::_notification(int p_what) {
 					// Normal caret.
 					Rect2 l_caret, t_caret;
 					TextServer::Direction l_dir, t_dir;
-					TS->shaped_text_get_carets(text_rid, cursor_pos, l_caret, l_dir, t_caret, t_dir);
+					TS->shaped_text_get_carets(text_rid, caret_column, l_caret, l_dir, t_caret, t_dir);
 
 					if (l_caret == Rect2() && t_caret == Rect2()) {
 						// No carets, add one at the start.
@@ -791,28 +791,28 @@ void LineEdit::_notification(int p_what) {
 							l_dir = TextServer::DIRECTION_LTR;
 							l_caret = Rect2(Vector2(x_ofs, y), Size2(caret_width, h));
 						}
-						RenderingServer::get_singleton()->canvas_item_add_rect(ci, l_caret, cursor_color);
+						RenderingServer::get_singleton()->canvas_item_add_rect(ci, l_caret, caret_color);
 					} else {
 						if (l_caret != Rect2() && l_dir == TextServer::DIRECTION_AUTO) {
 							// Draw extra marker on top of mid caret.
 							Rect2 trect = Rect2(l_caret.position.x - 3 * caret_width, l_caret.position.y, 6 * caret_width, caret_width);
 							trect.position += ofs;
-							RenderingServer::get_singleton()->canvas_item_add_rect(ci, trect, cursor_color);
+							RenderingServer::get_singleton()->canvas_item_add_rect(ci, trect, caret_color);
 						}
 
 						l_caret.position += ofs;
 						l_caret.size.x = caret_width;
-						RenderingServer::get_singleton()->canvas_item_add_rect(ci, l_caret, cursor_color);
+						RenderingServer::get_singleton()->canvas_item_add_rect(ci, l_caret, caret_color);
 
 						t_caret.position += ofs;
 						t_caret.size.x = caret_width;
 
-						RenderingServer::get_singleton()->canvas_item_add_rect(ci, t_caret, cursor_color);
+						RenderingServer::get_singleton()->canvas_item_add_rect(ci, t_caret, caret_color);
 					}
 				} else {
 					{
 						// IME intermediate text range.
-						Vector<Vector2> sel = TS->shaped_text_get_selection(text_rid, cursor_pos, cursor_pos + ime_text.length());
+						Vector<Vector2> sel = TS->shaped_text_get_selection(text_rid, caret_column, caret_column + ime_text.length());
 						for (int i = 0; i < sel.size(); i++) {
 							Rect2 rect = Rect2(sel[i].x + ofs.x, ofs.y, sel[i].y - sel[i].x, text_height);
 							if (rect.position.x + rect.size.x <= x_ofs || rect.position.x > ofs_max) {
@@ -825,12 +825,12 @@ void LineEdit::_notification(int p_what) {
 								rect.size.x = ofs_max - rect.position.x;
 							}
 							rect.size.y = caret_width;
-							RenderingServer::get_singleton()->canvas_item_add_rect(ci, rect, cursor_color);
+							RenderingServer::get_singleton()->canvas_item_add_rect(ci, rect, caret_color);
 						}
 					}
 					{
 						// IME caret.
-						Vector<Vector2> sel = TS->shaped_text_get_selection(text_rid, cursor_pos + ime_selection.x, cursor_pos + ime_selection.x + ime_selection.y);
+						Vector<Vector2> sel = TS->shaped_text_get_selection(text_rid, caret_column + ime_selection.x, caret_column + ime_selection.x + ime_selection.y);
 						for (int i = 0; i < sel.size(); i++) {
 							Rect2 rect = Rect2(sel[i].x + ofs.x, ofs.y, sel[i].y - sel[i].x, text_height);
 							if (rect.position.x + rect.size.x <= x_ofs || rect.position.x > ofs_max) {
@@ -843,7 +843,7 @@ void LineEdit::_notification(int p_what) {
 								rect.size.x = ofs_max - rect.position.x;
 							}
 							rect.size.y = caret_width * 3;
-							RenderingServer::get_singleton()->canvas_item_add_rect(ci, rect, cursor_color);
+							RenderingServer::get_singleton()->canvas_item_add_rect(ci, rect, caret_color);
 						}
 					}
 				}
@@ -869,8 +869,8 @@ void LineEdit::_notification(int p_what) {
 
 			if (get_viewport()->get_window_id() != DisplayServer::INVALID_WINDOW_ID && DisplayServer::get_singleton()->has_feature(DisplayServer::FEATURE_IME)) {
 				DisplayServer::get_singleton()->window_set_ime_active(true, get_viewport()->get_window_id());
-				Point2 cursor_pos = Point2(get_cursor_position(), 1) * get_minimum_size().height;
-				DisplayServer::get_singleton()->window_set_ime_position(get_global_position() + cursor_pos, get_viewport()->get_window_id());
+				Point2 caret_column = Point2(get_caret_column(), 1) * get_minimum_size().height;
+				DisplayServer::get_singleton()->window_set_ime_position(get_global_position() + caret_column, get_viewport()->get_window_id());
 			}
 
 			show_virtual_keyboard();
@@ -887,7 +887,7 @@ void LineEdit::_notification(int p_what) {
 			ime_text = "";
 			ime_selection = Point2();
 			_shape();
-			set_cursor_position(cursor_pos); // Update scroll_offset
+			set_caret_column(caret_column); // Update scroll_offset
 
 			if (DisplayServer::get_singleton()->has_feature(DisplayServer::FEATURE_VIRTUAL_KEYBOARD) && virtual_keyboard_enabled) {
 				DisplayServer::get_singleton()->virtual_keyboard_hide();
@@ -899,7 +899,7 @@ void LineEdit::_notification(int p_what) {
 				ime_text = DisplayServer::get_singleton()->ime_get_text();
 				ime_selection = DisplayServer::get_singleton()->ime_get_selection();
 				_shape();
-				set_cursor_position(cursor_pos); // Update scroll_offset
+				set_caret_column(caret_column); // Update scroll_offset
 
 				update();
 			}
@@ -933,7 +933,7 @@ void LineEdit::paste_text() {
 		if (selection.enabled) {
 			selection_delete();
 		}
-		append_at_cursor(paste_buffer);
+		insert_text_at_caret(paste_buffer);
 
 		if (!text_changed_dirty) {
 			if (is_inside_tree() && text.length() != prev_len) {
@@ -961,7 +961,7 @@ void LineEdit::undo() {
 	TextOperation op = undo_stack_pos->get();
 	text = op.text;
 	scroll_offset = op.scroll_offset;
-	set_cursor_position(op.cursor_pos);
+	set_caret_column(op.caret_column);
 
 	_shape();
 	_emit_text_change();
@@ -982,7 +982,7 @@ void LineEdit::redo() {
 	TextOperation op = undo_stack_pos->get();
 	text = op.text;
 	scroll_offset = op.scroll_offset;
-	set_cursor_position(op.cursor_pos);
+	set_caret_column(op.caret_column);
 
 	_shape();
 	_emit_text_change();
@@ -990,7 +990,7 @@ void LineEdit::redo() {
 
 void LineEdit::shift_selection_check_pre(bool p_shift) {
 	if (!selection.enabled && p_shift) {
-		selection.cursor_start = cursor_pos;
+		selection.start_column = caret_column;
 	}
 	if (!p_shift) {
 		deselect();
@@ -999,11 +999,11 @@ void LineEdit::shift_selection_check_pre(bool p_shift) {
 
 void LineEdit::shift_selection_check_post(bool p_shift) {
 	if (p_shift) {
-		selection_fill_at_cursor();
+		selection_fill_at_caret();
 	}
 }
 
-void LineEdit::set_cursor_at_pixel_pos(int p_x) {
+void LineEdit::set_caret_at_pixel_pos(int p_x) {
 	Ref<StyleBox> style = get_theme_stylebox("normal");
 	bool rtl = is_layout_rtl();
 
@@ -1048,10 +1048,10 @@ void LineEdit::set_cursor_at_pixel_pos(int p_x) {
 	}
 
 	int ofs = TS->shaped_text_hit_test_position(text_rid, p_x - x_ofs - scroll_offset);
-	set_cursor_position(ofs);
+	set_caret_column(ofs);
 }
 
-Vector2i LineEdit::get_cursor_pixel_pos() {
+Vector2i LineEdit::get_caret_pixel_pos() {
 	Ref<StyleBox> style = get_theme_stylebox("normal");
 	bool rtl = is_layout_rtl();
 
@@ -1100,9 +1100,9 @@ Vector2i LineEdit::get_cursor_pixel_pos() {
 	TextServer::Direction l_dir, t_dir;
 	// Get position of the start of caret.
 	if (ime_text.length() != 0 && ime_selection.x != 0) {
-		TS->shaped_text_get_carets(text_rid, cursor_pos + ime_selection.x, l_caret, l_dir, t_caret, t_dir);
+		TS->shaped_text_get_carets(text_rid, caret_column + ime_selection.x, l_caret, l_dir, t_caret, t_dir);
 	} else {
-		TS->shaped_text_get_carets(text_rid, cursor_pos, l_caret, l_dir, t_caret, t_dir);
+		TS->shaped_text_get_carets(text_rid, caret_column, l_caret, l_dir, t_caret, t_dir);
 	}
 
 	if ((l_caret != Rect2() && (l_dir == TextServer::DIRECTION_AUTO || l_dir == (TextServer::Direction)input_direction)) || (t_caret == Rect2())) {
@@ -1114,9 +1114,9 @@ Vector2i LineEdit::get_cursor_pixel_pos() {
 	// Get position of the end of caret.
 	if (ime_text.length() != 0) {
 		if (ime_selection.y != 0) {
-			TS->shaped_text_get_carets(text_rid, cursor_pos + ime_selection.x + ime_selection.y, l_caret, l_dir, t_caret, t_dir);
+			TS->shaped_text_get_carets(text_rid, caret_column + ime_selection.x + ime_selection.y, l_caret, l_dir, t_caret, t_dir);
 		} else {
-			TS->shaped_text_get_carets(text_rid, cursor_pos + ime_text.size(), l_caret, l_dir, t_caret, t_dir);
+			TS->shaped_text_get_carets(text_rid, caret_column + ime_text.size(), l_caret, l_dir, t_caret, t_dir);
 		}
 		if ((l_caret != Rect2() && (l_dir == TextServer::DIRECTION_AUTO || l_dir == (TextServer::Direction)input_direction)) || (t_caret == Rect2())) {
 			ret.y = x_ofs + l_caret.position.x + scroll_offset;
@@ -1130,19 +1130,19 @@ Vector2i LineEdit::get_cursor_pixel_pos() {
 	return ret;
 }
 
-void LineEdit::set_mid_grapheme_caret_enabled(const bool p_enabled) {
-	mid_grapheme_caret_enabled = p_enabled;
+void LineEdit::set_caret_mid_grapheme_enabled(const bool p_enabled) {
+	caret_mid_grapheme_enabled = p_enabled;
 }
 
-bool LineEdit::get_mid_grapheme_caret_enabled() const {
-	return mid_grapheme_caret_enabled;
+bool LineEdit::is_caret_mid_grapheme_enabled() const {
+	return caret_mid_grapheme_enabled;
 }
 
-bool LineEdit::cursor_get_blink_enabled() const {
+bool LineEdit::is_caret_blink_enabled() const {
 	return caret_blink_enabled;
 }
 
-void LineEdit::cursor_set_blink_enabled(const bool p_enabled) {
+void LineEdit::set_caret_blink_enabled(const bool p_enabled) {
 	caret_blink_enabled = p_enabled;
 
 	if (has_focus() || caret_force_displayed) {
@@ -1160,21 +1160,21 @@ void LineEdit::cursor_set_blink_enabled(const bool p_enabled) {
 	notify_property_list_changed();
 }
 
-bool LineEdit::cursor_get_force_displayed() const {
+bool LineEdit::is_caret_force_displayed() const {
 	return caret_force_displayed;
 }
 
-void LineEdit::cursor_set_force_displayed(const bool p_enabled) {
+void LineEdit::set_caret_force_displayed(const bool p_enabled) {
 	caret_force_displayed = p_enabled;
-	cursor_set_blink_enabled(caret_blink_enabled);
+	set_caret_blink_enabled(caret_blink_enabled);
 	update();
 }
 
-float LineEdit::cursor_get_blink_speed() const {
+float LineEdit::get_caret_blink_speed() const {
 	return caret_blink_timer->get_wait_time();
 }
 
-void LineEdit::cursor_set_blink_speed(const float p_speed) {
+void LineEdit::set_caret_blink_speed(const float p_speed) {
 	ERR_FAIL_COND(p_speed <= 0);
 	caret_blink_timer->set_wait_time(p_speed);
 }
@@ -1198,14 +1198,14 @@ void LineEdit::_toggle_draw_caret() {
 }
 
 void LineEdit::delete_char() {
-	if ((text.length() <= 0) || (cursor_pos == 0)) {
+	if ((text.length() <= 0) || (caret_column == 0)) {
 		return;
 	}
 
-	text.erase(cursor_pos - 1, 1);
+	text.erase(caret_column - 1, 1);
 	_shape();
 
-	set_cursor_position(get_cursor_position() - 1);
+	set_caret_column(get_caret_column() - 1);
 
 	_text_changed();
 }
@@ -1217,10 +1217,10 @@ void LineEdit::delete_text(int p_from_column, int p_to_column) {
 	text.erase(p_from_column, p_to_column - p_from_column);
 	_shape();
 
-	cursor_pos -= CLAMP(cursor_pos - p_from_column, 0, p_to_column - p_from_column);
+	caret_column -= CLAMP(caret_column - p_from_column, 0, p_to_column - p_from_column);
 
-	if (cursor_pos >= text.length()) {
-		cursor_pos = text.length();
+	if (caret_column >= text.length()) {
+		caret_column = text.length();
 	}
 
 	if (!text_changed_dirty) {
@@ -1233,10 +1233,11 @@ void LineEdit::delete_text(int p_from_column, int p_to_column) {
 
 void LineEdit::set_text(String p_text) {
 	clear_internal();
-	append_at_cursor(p_text);
+	insert_text_at_caret(p_text);
+	_create_undo_state();
 
 	update();
-	cursor_pos = 0;
+	caret_column = 0;
 	scroll_offset = 0;
 }
 
@@ -1346,7 +1347,7 @@ void LineEdit::show_virtual_keyboard() {
 		if (selection.enabled) {
 			DisplayServer::get_singleton()->virtual_keyboard_show(text, get_global_rect(), false, max_length, selection.begin, selection.end);
 		} else {
-			DisplayServer::get_singleton()->virtual_keyboard_show(text, get_global_rect(), false, max_length, cursor_pos);
+			DisplayServer::get_singleton()->virtual_keyboard_show(text, get_global_rect(), false, max_length, caret_column);
 		}
 	}
 }
@@ -1375,16 +1376,16 @@ float LineEdit::get_placeholder_alpha() const {
 	return placeholder_alpha;
 }
 
-void LineEdit::set_cursor_position(int p_pos) {
-	if (p_pos > (int)text.length()) {
-		p_pos = text.length();
+void LineEdit::set_caret_column(int p_column) {
+	if (p_column > (int)text.length()) {
+		p_column = text.length();
 	}
 
-	if (p_pos < 0) {
-		p_pos = 0;
+	if (p_column < 0) {
+		p_column = 0;
 	}
 
-	cursor_pos = p_pos;
+	caret_column = p_column;
 
 	// Fit to window.
 
@@ -1439,7 +1440,7 @@ void LineEdit::set_cursor_position(int p_pos) {
 	}
 
 	// Note: Use two coordinates to fit IME input range.
-	Vector2i primary_catret_offset = get_cursor_pixel_pos();
+	Vector2i primary_catret_offset = get_caret_pixel_pos();
 
 	if (MIN(primary_catret_offset.x, primary_catret_offset.y) <= x_ofs) {
 		scroll_offset += (x_ofs - MIN(primary_catret_offset.x, primary_catret_offset.y));
@@ -1451,8 +1452,8 @@ void LineEdit::set_cursor_position(int p_pos) {
 	update();
 }
 
-int LineEdit::get_cursor_position() const {
-	return cursor_pos;
+int LineEdit::get_caret_column() const {
+	return caret_column;
 }
 
 void LineEdit::set_scroll_offset(int p_pos) {
@@ -1466,17 +1467,17 @@ int LineEdit::get_scroll_offset() const {
 	return scroll_offset;
 }
 
-void LineEdit::append_at_cursor(String p_text) {
+void LineEdit::insert_text_at_caret(String p_text) {
 	if ((max_length <= 0) || (text.length() + p_text.length() <= max_length)) {
-		String pre = text.substr(0, cursor_pos);
-		String post = text.substr(cursor_pos, text.length() - cursor_pos);
+		String pre = text.substr(0, caret_column);
+		String post = text.substr(caret_column, text.length() - caret_column);
 		text = pre + p_text + post;
 		_shape();
-		TextServer::Direction dir = TS->shaped_text_get_dominant_direciton_in_range(text_rid, cursor_pos, cursor_pos + p_text.length());
+		TextServer::Direction dir = TS->shaped_text_get_dominant_direciton_in_range(text_rid, caret_column, caret_column + p_text.length());
 		if (dir != TextServer::DIRECTION_AUTO) {
 			input_direction = (TextDirection)dir;
 		}
-		set_cursor_position(cursor_pos + p_text.length());
+		set_caret_column(caret_column + p_text.length());
 	} else {
 		emit_signal("text_change_rejected");
 	}
@@ -1485,7 +1486,7 @@ void LineEdit::append_at_cursor(String p_text) {
 void LineEdit::clear_internal() {
 	deselect();
 	_clear_undo_stack();
-	cursor_pos = 0;
+	caret_column = 0;
 	scroll_offset = 0;
 	undo_text = "";
 	text = "";
@@ -1505,7 +1506,7 @@ Size2 LineEdit::get_minimum_size() const {
 	min_size.width = get_theme_constant("minimum_character_width") * em_space_size;
 
 	if (expand_to_text_length) {
-		// Add a space because some fonts are too exact, and because cursor needs a bit more when at the end.
+		// Add a space because some fonts are too exact, and because caret needs a bit more when at the end.
 		min_size.width = MAX(min_size.width, full_width + em_space_size);
 	}
 
@@ -1526,7 +1527,7 @@ Size2 LineEdit::get_minimum_size() const {
 void LineEdit::deselect() {
 	selection.begin = 0;
 	selection.end = 0;
-	selection.cursor_start = 0;
+	selection.start_column = 0;
 	selection.enabled = false;
 	selection.creating = false;
 	selection.doubleclick = false;
@@ -1551,13 +1552,13 @@ int LineEdit::get_max_length() const {
 	return max_length;
 }
 
-void LineEdit::selection_fill_at_cursor() {
+void LineEdit::selection_fill_at_caret() {
 	if (!selecting_enabled) {
 		return;
 	}
 
-	selection.begin = cursor_pos;
-	selection.end = selection.cursor_start;
+	selection.begin = caret_column;
+	selection.end = selection.start_column;
 
 	if (selection.end < selection.begin) {
 		int aux = selection.end;
@@ -1714,82 +1715,82 @@ void LineEdit::menu_option(int p_option) {
 		} break;
 		case MENU_INSERT_LRM: {
 			if (editable) {
-				append_at_cursor(String::chr(0x200E));
+				insert_text_at_caret(String::chr(0x200E));
 			}
 		} break;
 		case MENU_INSERT_RLM: {
 			if (editable) {
-				append_at_cursor(String::chr(0x200F));
+				insert_text_at_caret(String::chr(0x200F));
 			}
 		} break;
 		case MENU_INSERT_LRE: {
 			if (editable) {
-				append_at_cursor(String::chr(0x202A));
+				insert_text_at_caret(String::chr(0x202A));
 			}
 		} break;
 		case MENU_INSERT_RLE: {
 			if (editable) {
-				append_at_cursor(String::chr(0x202B));
+				insert_text_at_caret(String::chr(0x202B));
 			}
 		} break;
 		case MENU_INSERT_LRO: {
 			if (editable) {
-				append_at_cursor(String::chr(0x202D));
+				insert_text_at_caret(String::chr(0x202D));
 			}
 		} break;
 		case MENU_INSERT_RLO: {
 			if (editable) {
-				append_at_cursor(String::chr(0x202E));
+				insert_text_at_caret(String::chr(0x202E));
 			}
 		} break;
 		case MENU_INSERT_PDF: {
 			if (editable) {
-				append_at_cursor(String::chr(0x202C));
+				insert_text_at_caret(String::chr(0x202C));
 			}
 		} break;
 		case MENU_INSERT_ALM: {
 			if (editable) {
-				append_at_cursor(String::chr(0x061C));
+				insert_text_at_caret(String::chr(0x061C));
 			}
 		} break;
 		case MENU_INSERT_LRI: {
 			if (editable) {
-				append_at_cursor(String::chr(0x2066));
+				insert_text_at_caret(String::chr(0x2066));
 			}
 		} break;
 		case MENU_INSERT_RLI: {
 			if (editable) {
-				append_at_cursor(String::chr(0x2067));
+				insert_text_at_caret(String::chr(0x2067));
 			}
 		} break;
 		case MENU_INSERT_FSI: {
 			if (editable) {
-				append_at_cursor(String::chr(0x2068));
+				insert_text_at_caret(String::chr(0x2068));
 			}
 		} break;
 		case MENU_INSERT_PDI: {
 			if (editable) {
-				append_at_cursor(String::chr(0x2069));
+				insert_text_at_caret(String::chr(0x2069));
 			}
 		} break;
 		case MENU_INSERT_ZWJ: {
 			if (editable) {
-				append_at_cursor(String::chr(0x200D));
+				insert_text_at_caret(String::chr(0x200D));
 			}
 		} break;
 		case MENU_INSERT_ZWNJ: {
 			if (editable) {
-				append_at_cursor(String::chr(0x200C));
+				insert_text_at_caret(String::chr(0x200C));
 			}
 		} break;
 		case MENU_INSERT_WJ: {
 			if (editable) {
-				append_at_cursor(String::chr(0x2060));
+				insert_text_at_caret(String::chr(0x2060));
 			}
 		} break;
 		case MENU_INSERT_SHY: {
 			if (editable) {
-				append_at_cursor(String::chr(0x00AD));
+				insert_text_at_caret(String::chr(0x00AD));
 			}
 		}
 	}
@@ -1809,18 +1810,18 @@ PopupMenu *LineEdit::get_menu() const {
 
 void LineEdit::_editor_settings_changed() {
 #ifdef TOOLS_ENABLED
-	cursor_set_blink_enabled(EDITOR_DEF("text_editor/cursor/caret_blink", false));
-	cursor_set_blink_speed(EDITOR_DEF("text_editor/cursor/caret_blink_speed", 0.65));
+	set_caret_blink_enabled(EDITOR_DEF("text_editor/cursor/caret_blink", false));
+	set_caret_blink_speed(EDITOR_DEF("text_editor/cursor/caret_blink_speed", 0.65));
 #endif
 }
 
-void LineEdit::set_expand_to_text_length(bool p_enabled) {
+void LineEdit::set_expand_to_text_length_enabled(bool p_enabled) {
 	expand_to_text_length = p_enabled;
 	minimum_size_changed();
-	set_cursor_position(cursor_pos);
+	set_caret_column(caret_column);
 }
 
-bool LineEdit::get_expand_to_text_length() const {
+bool LineEdit::is_expand_to_text_length_enabled() const {
 	return expand_to_text_length;
 }
 
@@ -1905,7 +1906,7 @@ void LineEdit::_shape() {
 		t = secret_character.repeat(text.length() + ime_text.length());
 	} else {
 		if (ime_text.length() > 0) {
-			t = text.substr(0, cursor_pos) + ime_text + text.substr(cursor_pos, text.length());
+			t = text.substr(0, caret_column) + ime_text + text.substr(caret_column, text.length());
 		} else {
 			t = text;
 		}
@@ -1970,7 +1971,7 @@ void LineEdit::_clear_undo_stack() {
 void LineEdit::_create_undo_state() {
 	TextOperation op;
 	op.text = text;
-	op.cursor_pos = cursor_pos;
+	op.caret_column = caret_column;
 	op.scroll_offset = scroll_offset;
 	undo_stack.push_back(op);
 }
@@ -2115,23 +2116,23 @@ void LineEdit::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_placeholder"), &LineEdit::get_placeholder);
 	ClassDB::bind_method(D_METHOD("set_placeholder_alpha", "alpha"), &LineEdit::set_placeholder_alpha);
 	ClassDB::bind_method(D_METHOD("get_placeholder_alpha"), &LineEdit::get_placeholder_alpha);
-	ClassDB::bind_method(D_METHOD("set_cursor_position", "position"), &LineEdit::set_cursor_position);
-	ClassDB::bind_method(D_METHOD("get_cursor_position"), &LineEdit::get_cursor_position);
+	ClassDB::bind_method(D_METHOD("set_caret_column", "position"), &LineEdit::set_caret_column);
+	ClassDB::bind_method(D_METHOD("get_caret_column"), &LineEdit::get_caret_column);
 	ClassDB::bind_method(D_METHOD("get_scroll_offset"), &LineEdit::get_scroll_offset);
-	ClassDB::bind_method(D_METHOD("set_expand_to_text_length", "enabled"), &LineEdit::set_expand_to_text_length);
-	ClassDB::bind_method(D_METHOD("get_expand_to_text_length"), &LineEdit::get_expand_to_text_length);
-	ClassDB::bind_method(D_METHOD("cursor_set_blink_enabled", "enabled"), &LineEdit::cursor_set_blink_enabled);
-	ClassDB::bind_method(D_METHOD("cursor_get_blink_enabled"), &LineEdit::cursor_get_blink_enabled);
-	ClassDB::bind_method(D_METHOD("set_mid_grapheme_caret_enabled", "enabled"), &LineEdit::set_mid_grapheme_caret_enabled);
-	ClassDB::bind_method(D_METHOD("get_mid_grapheme_caret_enabled"), &LineEdit::get_mid_grapheme_caret_enabled);
-	ClassDB::bind_method(D_METHOD("cursor_set_force_displayed", "enabled"), &LineEdit::cursor_set_force_displayed);
-	ClassDB::bind_method(D_METHOD("cursor_get_force_displayed"), &LineEdit::cursor_get_force_displayed);
-	ClassDB::bind_method(D_METHOD("cursor_set_blink_speed", "blink_speed"), &LineEdit::cursor_set_blink_speed);
-	ClassDB::bind_method(D_METHOD("cursor_get_blink_speed"), &LineEdit::cursor_get_blink_speed);
+	ClassDB::bind_method(D_METHOD("set_expand_to_text_length_enabled", "enabled"), &LineEdit::set_expand_to_text_length_enabled);
+	ClassDB::bind_method(D_METHOD("is_expand_to_text_length_enabled"), &LineEdit::is_expand_to_text_length_enabled);
+	ClassDB::bind_method(D_METHOD("set_caret_blink_enabled", "enabled"), &LineEdit::set_caret_blink_enabled);
+	ClassDB::bind_method(D_METHOD("is_caret_blink_enabled"), &LineEdit::is_caret_blink_enabled);
+	ClassDB::bind_method(D_METHOD("set_caret_mid_grapheme_enabled", "enabled"), &LineEdit::set_caret_mid_grapheme_enabled);
+	ClassDB::bind_method(D_METHOD("is_caret_mid_grapheme_enabled"), &LineEdit::is_caret_mid_grapheme_enabled);
+	ClassDB::bind_method(D_METHOD("set_caret_force_displayed", "enabled"), &LineEdit::set_caret_force_displayed);
+	ClassDB::bind_method(D_METHOD("is_caret_force_displayed"), &LineEdit::is_caret_force_displayed);
+	ClassDB::bind_method(D_METHOD("set_caret_blink_speed", "blink_speed"), &LineEdit::set_caret_blink_speed);
+	ClassDB::bind_method(D_METHOD("get_caret_blink_speed"), &LineEdit::get_caret_blink_speed);
 	ClassDB::bind_method(D_METHOD("set_max_length", "chars"), &LineEdit::set_max_length);
 	ClassDB::bind_method(D_METHOD("get_max_length"), &LineEdit::get_max_length);
-	ClassDB::bind_method(D_METHOD("append_at_cursor", "text"), &LineEdit::append_at_cursor);
-	ClassDB::bind_method(D_METHOD("delete_char_at_cursor"), &LineEdit::delete_char);
+	ClassDB::bind_method(D_METHOD("insert_text_at_caret", "text"), &LineEdit::insert_text_at_caret);
+	ClassDB::bind_method(D_METHOD("delete_char_at_caret"), &LineEdit::delete_char);
 	ClassDB::bind_method(D_METHOD("delete_text", "from_column", "to_column"), &LineEdit::delete_text);
 	ClassDB::bind_method(D_METHOD("set_editable", "enabled"), &LineEdit::set_editable);
 	ClassDB::bind_method(D_METHOD("is_editable"), &LineEdit::is_editable);
@@ -2199,7 +2200,7 @@ void LineEdit::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "editable"), "set_editable", "is_editable");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "secret"), "set_secret", "is_secret");
 	ADD_PROPERTY(PropertyInfo(Variant::STRING, "secret_character"), "set_secret_character", "get_secret_character");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "expand_to_text_length"), "set_expand_to_text_length", "get_expand_to_text_length");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "expand_to_text_length"), "set_expand_to_text_length_enabled", "is_expand_to_text_length_enabled");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "context_menu_enabled"), "set_context_menu_enabled", "is_context_menu_enabled");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "virtual_keyboard_enabled"), "set_virtual_keyboard_enabled", "is_virtual_keyboard_enabled");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "clear_button_enabled"), "set_clear_button_enabled", "is_clear_button_enabled");
@@ -2216,11 +2217,11 @@ void LineEdit::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::STRING, "placeholder_text"), "set_placeholder", "get_placeholder");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "placeholder_alpha", PROPERTY_HINT_RANGE, "0,1,0.001"), "set_placeholder_alpha", "get_placeholder_alpha");
 	ADD_GROUP("Caret", "caret_");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_blink"), "cursor_set_blink_enabled", "cursor_get_blink_enabled");
-	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "caret_blink_speed", PROPERTY_HINT_RANGE, "0.1,10,0.01"), "cursor_set_blink_speed", "cursor_get_blink_speed");
-	ADD_PROPERTY(PropertyInfo(Variant::INT, "caret_position"), "set_cursor_position", "get_cursor_position");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_force_displayed"), "cursor_set_force_displayed", "cursor_get_force_displayed");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_mid_grapheme"), "set_mid_grapheme_caret_enabled", "get_mid_grapheme_caret_enabled");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_blink"), "set_caret_blink_enabled", "is_caret_blink_enabled");
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "caret_blink_speed", PROPERTY_HINT_RANGE, "0.1,10,0.01"), "set_caret_blink_speed", "get_caret_blink_speed");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "caret_column"), "set_caret_column", "get_caret_column");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_force_displayed"), "set_caret_force_displayed", "is_caret_force_displayed");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "caret_mid_grapheme"), "set_caret_mid_grapheme_enabled", "is_caret_mid_grapheme_enabled");
 }
 
 LineEdit::LineEdit() {
@@ -2236,7 +2237,7 @@ LineEdit::LineEdit() {
 	add_child(caret_blink_timer);
 	caret_blink_timer->set_wait_time(0.65);
 	caret_blink_timer->connect("timeout", callable_mp(this, &LineEdit::_toggle_draw_caret));
-	cursor_set_blink_enabled(false);
+	set_caret_blink_enabled(false);
 
 	menu = memnew(PopupMenu);
 	add_child(menu);
diff --git a/scene/gui/line_edit.h b/scene/gui/line_edit.h
index ef36377f2e..f4f0ff0629 100644
--- a/scene/gui/line_edit.h
+++ b/scene/gui/line_edit.h
@@ -103,9 +103,9 @@ private:
 	PopupMenu *menu_dir = nullptr;
 	PopupMenu *menu_ctl = nullptr;
 
-	bool mid_grapheme_caret_enabled = false;
+	bool caret_mid_grapheme_enabled = false;
 
-	int cursor_pos = 0;
+	int caret_column = 0;
 	int scroll_offset = 0;
 	int max_length = 0; // 0 for no maximum.
 
@@ -131,7 +131,7 @@ private:
 	struct Selection {
 		int begin = 0;
 		int end = 0;
-		int cursor_start = 0;
+		int start_column = 0;
 		bool enabled = false;
 		bool creating = false;
 		bool doubleclick = false;
@@ -140,7 +140,7 @@ private:
 	} selection;
 
 	struct TextOperation {
-		int cursor_pos = 0;
+		int caret_column = 0;
 		int scroll_offset = 0;
 		int cached_width = 0;
 		String text;
@@ -175,12 +175,12 @@ private:
 	void shift_selection_check_pre(bool);
 	void shift_selection_check_post(bool);
 
-	void selection_fill_at_cursor();
+	void selection_fill_at_caret();
 	void set_scroll_offset(int p_pos);
 	int get_scroll_offset() const;
 
-	void set_cursor_at_pixel_pos(int p_x);
-	Vector2i get_cursor_pixel_pos();
+	void set_caret_at_pixel_pos(int p_x);
+	Vector2i get_caret_pixel_pos();
 
 	void _reset_caret_blink_timer();
 	void _toggle_draw_caret();
@@ -191,10 +191,10 @@ private:
 	void _editor_settings_changed();
 
 	void _swap_current_input_direction();
-	void _move_cursor_left(bool p_select, bool p_move_by_word = false);
-	void _move_cursor_right(bool p_select, bool p_move_by_word = false);
-	void _move_cursor_start(bool p_select);
-	void _move_cursor_end(bool p_select);
+	void _move_caret_left(bool p_select, bool p_move_by_word = false);
+	void _move_caret_right(bool p_select, bool p_move_by_word = false);
+	void _move_caret_start(bool p_select);
+	void _move_caret_end(bool p_select);
 	void _backspace(bool p_word = false, bool p_all_to_left = false);
 	void _delete(bool p_word = false, bool p_all_to_right = false);
 
@@ -259,26 +259,26 @@ public:
 	void set_placeholder_alpha(float p_alpha);
 	float get_placeholder_alpha() const;
 
-	void set_cursor_position(int p_pos);
-	int get_cursor_position() const;
+	void set_caret_column(int p_column);
+	int get_caret_column() const;
 
 	void set_max_length(int p_max_length);
 	int get_max_length() const;
 
-	void append_at_cursor(String p_text);
+	void insert_text_at_caret(String p_text);
 	void clear();
 
-	void set_mid_grapheme_caret_enabled(const bool p_enabled);
-	bool get_mid_grapheme_caret_enabled() const;
+	void set_caret_mid_grapheme_enabled(const bool p_enabled);
+	bool is_caret_mid_grapheme_enabled() const;
 
-	bool cursor_get_blink_enabled() const;
-	void cursor_set_blink_enabled(const bool p_enabled);
+	bool is_caret_blink_enabled() const;
+	void set_caret_blink_enabled(const bool p_enabled);
 
-	float cursor_get_blink_speed() const;
-	void cursor_set_blink_speed(const float p_speed);
+	float get_caret_blink_speed() const;
+	void set_caret_blink_speed(const float p_speed);
 
-	bool cursor_get_force_displayed() const;
-	void cursor_set_force_displayed(const bool p_enabled);
+	void set_caret_force_displayed(const bool p_enabled);
+	bool is_caret_force_displayed() const;
 
 	void copy_text();
 	void cut_text();
@@ -297,8 +297,8 @@ public:
 
 	virtual Size2 get_minimum_size() const override;
 
-	void set_expand_to_text_length(bool p_enabled);
-	bool get_expand_to_text_length() const;
+	void set_expand_to_text_length_enabled(bool p_enabled);
+	bool is_expand_to_text_length_enabled() const;
 
 	void set_clear_button_enabled(bool p_enabled);
 	bool is_clear_button_enabled() const;
diff --git a/scene/gui/text_edit.cpp b/scene/gui/text_edit.cpp
index f54ab004c6..4f508423b3 100644
--- a/scene/gui/text_edit.cpp
+++ b/scene/gui/text_edit.cpp
@@ -1973,7 +1973,7 @@ void TextEdit::backspace_at_cursor() {
 		}
 	}
 
-	cursor_set_line(prev_line, true, true);
+	cursor_set_line(prev_line, false, true);
 	cursor_set_column(prev_column);
 }
 
@@ -2207,7 +2207,7 @@ void TextEdit::_new_line(bool p_split_current_line, bool p_above) {
 	if (!p_split_current_line) {
 		if (p_above) {
 			if (cursor.line > 0) {
-				cursor_set_line(cursor.line - 1);
+				cursor_set_line(cursor.line - 1, false);
 				cursor_set_column(text[cursor.line].length());
 			} else {
 				cursor_set_column(0);
@@ -2223,7 +2223,7 @@ void TextEdit::_new_line(bool p_split_current_line, bool p_above) {
 	if (first_line) {
 		cursor_set_line(0);
 	} else if (brace_indent) {
-		cursor_set_line(cursor.line - 1);
+		cursor_set_line(cursor.line - 1, false);
 		cursor_set_column(text[cursor.line].length());
 	}
 	end_complex_operation();
@@ -2573,7 +2573,7 @@ void TextEdit::_backspace(bool p_word, bool p_all_to_left) {
 
 		_remove_text(line, column, cursor.line, cursor.column);
 
-		cursor_set_line(line);
+		cursor_set_line(line, false);
 		cursor_set_column(column);
 	} else {
 		// One character.
@@ -2640,7 +2640,7 @@ void TextEdit::_delete_selection() {
 		selection.active = false;
 		update();
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line, true, false);
+		cursor_set_line(selection.from_line, false, false);
 		cursor_set_column(selection.from_column);
 		update();
 	}
@@ -3261,7 +3261,7 @@ void TextEdit::_gui_input(const Ref<InputEvent> &p_gui_input) {
 				accept_event();
 				return;
 			}
-			if (k->is_action("ui_accept", true) || k->is_action("ui_text_completion_accept", true)) {
+			if (k->is_action("ui_text_completion_accept", true)) {
 				_confirm_completion();
 				accept_event();
 				return;
@@ -3851,7 +3851,7 @@ void TextEdit::_insert_text_at_cursor(const String &p_text) {
 	int new_column, new_line;
 	_insert_text(cursor.line, cursor.column, p_text, &new_line, &new_column);
 	_update_scrollbars();
-	cursor_set_line(new_line);
+	cursor_set_line(new_line, false);
 	cursor_set_column(new_column);
 
 	update();
@@ -4425,7 +4425,7 @@ int TextEdit::get_column_x_offset_for_line(int p_char, int p_line) const {
 
 void TextEdit::insert_text_at_cursor(const String &p_text) {
 	if (selection.active) {
-		cursor_set_line(selection.from_line);
+		cursor_set_line(selection.from_line, false);
 		cursor_set_column(selection.from_column);
 
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
@@ -5042,7 +5042,7 @@ void TextEdit::cut() {
 		DisplayServer::get_singleton()->clipboard_set(clipboard);
 
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line); // Set afterwards else it causes the view to be offset.
+		cursor_set_line(selection.from_line, false); // Set afterwards else it causes the view to be offset.
 		cursor_set_column(selection.from_column);
 
 		selection.active = false;
@@ -5078,7 +5078,7 @@ void TextEdit::paste() {
 		selection.active = false;
 		selection.selecting_mode = SelectionMode::SELECTION_MODE_NONE;
 		_remove_text(selection.from_line, selection.from_column, selection.to_line, selection.to_column);
-		cursor_set_line(selection.from_line);
+		cursor_set_line(selection.from_line, false);
 		cursor_set_column(selection.from_column);
 
 	} else if (!cut_copy_line.is_empty() && cut_copy_line == clipboard) {
@@ -5817,11 +5817,11 @@ void TextEdit::undo() {
 
 	_update_scrollbars();
 	if (undo_stack_pos->get().type == TextOperation::TYPE_REMOVE) {
-		cursor_set_line(undo_stack_pos->get().to_line);
+		cursor_set_line(undo_stack_pos->get().to_line, false);
 		cursor_set_column(undo_stack_pos->get().to_column);
 		_cancel_code_hint();
 	} else {
-		cursor_set_line(undo_stack_pos->get().from_line);
+		cursor_set_line(undo_stack_pos->get().from_line, false);
 		cursor_set_column(undo_stack_pos->get().from_column);
 	}
 	update();
@@ -5856,7 +5856,7 @@ void TextEdit::redo() {
 	}
 
 	_update_scrollbars();
-	cursor_set_line(undo_stack_pos->get().to_line);
+	cursor_set_line(undo_stack_pos->get().to_line, false);
 	cursor_set_column(undo_stack_pos->get().to_column);
 	undo_stack_pos = undo_stack_pos->next();
 	update();
diff --git a/scene/resources/default_theme/default_theme.cpp b/scene/resources/default_theme/default_theme.cpp
index f05b43377f..b9f4a7a741 100644
--- a/scene/resources/default_theme/default_theme.cpp
+++ b/scene/resources/default_theme/default_theme.cpp
@@ -438,7 +438,7 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const
 	theme->set_color("font_selected_color", "LineEdit", Color(0, 0, 0));
 	theme->set_color("font_uneditable_color", "LineEdit", Color(control_font_color.r, control_font_color.g, control_font_color.b, 0.5f));
 	theme->set_color("font_outline_color", "LineEdit", Color(1, 1, 1));
-	theme->set_color("cursor_color", "LineEdit", control_font_hover_color);
+	theme->set_color("caret_color", "LineEdit", control_font_hover_color);
 	theme->set_color("selection_color", "LineEdit", control_selection_color);
 	theme->set_color("clear_button_color", "LineEdit", control_font_color);
 	theme->set_color("clear_button_color_pressed", "LineEdit", control_font_pressed_color);
diff --git a/scene/resources/height_map_shape_3d.cpp b/scene/resources/height_map_shape_3d.cpp
index 5593bb766f..de5da944bc 100644
--- a/scene/resources/height_map_shape_3d.cpp
+++ b/scene/resources/height_map_shape_3d.cpp
@@ -41,10 +41,10 @@ Vector<Vector3> HeightMapShape3D::get_debug_mesh_lines() const {
 		Vector2 size(map_width - 1, map_depth - 1);
 		Vector2 start = size * -0.5;
 
-		const real_t *r = map_data.ptr();
+		const float *r = map_data.ptr();
 
 		// reserve some memory for our points..
-		points.resize(((map_width - 1) * map_depth * 2) + (map_width * (map_depth - 1) * 2));
+		points.resize(((map_width - 1) * map_depth * 2) + (map_width * (map_depth - 1) * 2) + ((map_width - 1) * (map_depth - 1) * 2));
 
 		// now set our points
 		int r_offset = 0;
@@ -65,6 +65,11 @@ Vector<Vector3> HeightMapShape3D::get_debug_mesh_lines() const {
 					points.write[w_offset++] = Vector3(height.x, r[r_offset + map_width - 1], height.z + 1.0);
 				}
 
+				if ((w != map_width - 1) && (d != map_depth - 1)) {
+					points.write[w_offset++] = Vector3(height.x + 1.0, r[r_offset], height.z);
+					points.write[w_offset++] = Vector3(height.x, r[r_offset + map_width - 1], height.z + 1.0);
+				}
+
 				height.x += 1.0;
 			}
 
@@ -100,7 +105,7 @@ void HeightMapShape3D::set_map_width(int p_new) {
 		int new_size = map_width * map_depth;
 		map_data.resize(map_width * map_depth);
 
-		real_t *w = map_data.ptrw();
+		float *w = map_data.ptrw();
 		while (was_size < new_size) {
 			w[was_size++] = 0.0;
 		}
@@ -124,7 +129,7 @@ void HeightMapShape3D::set_map_depth(int p_new) {
 		int new_size = map_width * map_depth;
 		map_data.resize(new_size);
 
-		real_t *w = map_data.ptrw();
+		float *w = map_data.ptrw();
 		while (was_size < new_size) {
 			w[was_size++] = 0.0;
 		}
@@ -146,8 +151,8 @@ void HeightMapShape3D::set_map_data(PackedFloat32Array p_new) {
 	}
 
 	// copy
-	real_t *w = map_data.ptrw();
-	const real_t *r = p_new.ptr();
+	float *w = map_data.ptrw();
+	const float *r = p_new.ptr();
 	for (int i = 0; i < size; i++) {
 		float val = r[i];
 		w[i] = val;
@@ -189,7 +194,7 @@ void HeightMapShape3D::_bind_methods() {
 HeightMapShape3D::HeightMapShape3D() :
 		Shape3D(PhysicsServer3D::get_singleton()->shape_create(PhysicsServer3D::SHAPE_HEIGHTMAP)) {
 	map_data.resize(map_width * map_depth);
-	real_t *w = map_data.ptrw();
+	float *w = map_data.ptrw();
 	w[0] = 0.0;
 	w[1] = 0.0;
 	w[2] = 0.0;
diff --git a/scene/resources/height_map_shape_3d.h b/scene/resources/height_map_shape_3d.h
index 6fc88cff90..1219791c56 100644
--- a/scene/resources/height_map_shape_3d.h
+++ b/scene/resources/height_map_shape_3d.h
@@ -39,8 +39,8 @@ class HeightMapShape3D : public Shape3D {
 	int map_width = 2;
 	int map_depth = 2;
 	PackedFloat32Array map_data;
-	float min_height = 0.0;
-	float max_height = 0.0;
+	real_t min_height = 0.0;
+	real_t max_height = 0.0;
 
 protected:
 	static void _bind_methods();
diff --git a/scene/resources/particles_material.cpp b/scene/resources/particles_material.cpp
index 195ce070a7..bb47eebe9b 100644
--- a/scene/resources/particles_material.cpp
+++ b/scene/resources/particles_material.cpp
@@ -289,7 +289,7 @@ void ParticlesMaterial::_update_shader() {
 	code += "}\n";
 	code += "\n";
 
-	code += "void compute() {\n";
+	code += "void start() {\n";
 	code += "	uint base_number = NUMBER;\n";
 	code += "	uint alt_seed = hash(base_number + uint(1) + RANDOM_SEED);\n";
 	code += "	float angle_rand = rand_from_seed(alt_seed);\n";
@@ -305,97 +305,94 @@ void ParticlesMaterial::_update_shader() {
 		code += "	ivec2 emission_tex_size = textureSize(emission_texture_points, 0);\n";
 		code += "	ivec2 emission_tex_ofs = ivec2(point % emission_tex_size.x, point / emission_tex_size.x);\n";
 	}
-	code += "	float tv = 0.0;\n";
-	code += "	if (RESTART) {\n";
-
 	if (tex_parameters[PARAM_ANGLE].is_valid()) {
-		code += "		float tex_angle = textureLod(angle_texture, vec2(0.0, 0.0), 0.0).r;\n";
+		code += "	float tex_angle = textureLod(angle_texture, vec2(0.0, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_angle = 0.0;\n";
+		code += "	float tex_angle = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_ANIM_OFFSET].is_valid()) {
-		code += "		float tex_anim_offset = textureLod(anim_offset_texture, vec2(0.0, 0.0), 0.0).r;\n";
+		code += "	float tex_anim_offset = textureLod(anim_offset_texture, vec2(0.0, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_anim_offset = 0.0;\n";
+		code += "	float tex_anim_offset = 0.0;\n";
 	}
 
-	code += "		float spread_rad = spread * degree_to_rad;\n";
+	code += "	float spread_rad = spread * degree_to_rad;\n";
 
-	code += "		if (RESTART_VELOCITY) {\n";
+	code += "	if (RESTART_VELOCITY) {\n";
 
 	if (tex_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) {
-		code += "			float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(0.0, 0.0), 0.0).r;\n";
+		code += "		float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(0.0, 0.0), 0.0).r;\n";
 	} else {
-		code += "			float tex_linear_velocity = 0.0;\n";
+		code += "		float tex_linear_velocity = 0.0;\n";
 	}
 
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-		code += "			float angle1_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad;\n";
-		code += "			angle1_rad += direction.x != 0.0 ? atan(direction.y, direction.x) : sign(direction.y) * (pi / 2.0);\n";
-		code += "			vec3 rot = vec3(cos(angle1_rad), sin(angle1_rad), 0.0);\n";
-		code += "			VELOCITY = rot * initial_linear_velocity * mix(1.0, rand_from_seed(alt_seed), initial_linear_velocity_random);\n";
+		code += "		float angle1_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad;\n";
+		code += "		angle1_rad += direction.x != 0.0 ? atan(direction.y, direction.x) : sign(direction.y) * (pi / 2.0);\n";
+		code += "		vec3 rot = vec3(cos(angle1_rad), sin(angle1_rad), 0.0);\n";
+		code += "		VELOCITY = rot * initial_linear_velocity * mix(1.0, rand_from_seed(alt_seed), initial_linear_velocity_random);\n";
 
 	} else {
 		//initiate velocity spread in 3D
-		code += "			float angle1_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad;\n";
-		code += "			float angle2_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad * (1.0 - flatness);\n";
-		code += "			vec3 direction_xz = vec3(sin(angle1_rad), 0.0, cos(angle1_rad));\n";
-		code += "			vec3 direction_yz = vec3(0.0, sin(angle2_rad), cos(angle2_rad));\n";
-		code += "			direction_yz.z = direction_yz.z / max(0.0001,sqrt(abs(direction_yz.z))); // better uniform distribution\n";
-		code += "			vec3 spread_direction = vec3(direction_xz.x * direction_yz.z, direction_yz.y, direction_xz.z * direction_yz.z);\n";
-		code += "			vec3 direction_nrm = normalize(direction);\n";
-		code += "			// rotate spread to direction\n";
-		code += "			vec3 binormal = cross(vec3(0.0, 1.0, 0.0), direction_nrm);\n";
-		code += "			if (length(binormal) < 0.0001) {\n";
-		code += "				// direction is parallel to Y. Choose Z as the binormal.\n";
-		code += "				binormal = vec3(0.0, 0.0, 1.0);\n";
-		code += "			}\n";
-		code += "			binormal = normalize(binormal);\n";
-		code += "			vec3 normal = cross(binormal, direction_nrm);\n";
-		code += "			spread_direction = binormal * spread_direction.x + normal * spread_direction.y + direction_nrm * spread_direction.z;\n";
-		code += "			VELOCITY = spread_direction * initial_linear_velocity * mix(1.0, rand_from_seed(alt_seed), initial_linear_velocity_random);\n";
+		code += "		float angle1_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad;\n";
+		code += "		float angle2_rad = rand_from_seed_m1_p1(alt_seed) * spread_rad * (1.0 - flatness);\n";
+		code += "		vec3 direction_xz = vec3(sin(angle1_rad), 0.0, cos(angle1_rad));\n";
+		code += "		vec3 direction_yz = vec3(0.0, sin(angle2_rad), cos(angle2_rad));\n";
+		code += "		direction_yz.z = direction_yz.z / max(0.0001,sqrt(abs(direction_yz.z))); // better uniform distribution\n";
+		code += "		vec3 spread_direction = vec3(direction_xz.x * direction_yz.z, direction_yz.y, direction_xz.z * direction_yz.z);\n";
+		code += "		vec3 direction_nrm = normalize(direction);\n";
+		code += "		// rotate spread to direction\n";
+		code += "		vec3 binormal = cross(vec3(0.0, 1.0, 0.0), direction_nrm);\n";
+		code += "		if (length(binormal) < 0.0001) {\n";
+		code += "			// direction is parallel to Y. Choose Z as the binormal.\n";
+		code += "			binormal = vec3(0.0, 0.0, 1.0);\n";
+		code += "		}\n";
+		code += "		binormal = normalize(binormal);\n";
+		code += "		vec3 normal = cross(binormal, direction_nrm);\n";
+		code += "		spread_direction = binormal * spread_direction.x + normal * spread_direction.y + direction_nrm * spread_direction.z;\n";
+		code += "		VELOCITY = spread_direction * initial_linear_velocity * mix(1.0, rand_from_seed(alt_seed), initial_linear_velocity_random);\n";
 	}
-	code += "		}\n";
+	code += "	}\n";
 
-	code += "		float base_angle = (initial_angle + tex_angle) * mix(1.0, angle_rand, initial_angle_random);\n";
-	code += "		CUSTOM.x = base_angle * degree_to_rad;\n"; // angle
-	code += "		CUSTOM.y = 0.0;\n"; // phase
-	code += "		CUSTOM.w = (1.0 - lifetime_randomness * rand_from_seed(alt_seed));\n";
-	code += "		CUSTOM.z = (anim_offset + tex_anim_offset) * mix(1.0, anim_offset_rand, anim_offset_random);\n"; // animation offset (0-1)
+	code += "	float base_angle = (initial_angle + tex_angle) * mix(1.0, angle_rand, initial_angle_random);\n";
+	code += "	CUSTOM.x = base_angle * degree_to_rad;\n"; // angle
+	code += "	CUSTOM.y = 0.0;\n"; // phase
+	code += "	CUSTOM.w = (1.0 - lifetime_randomness * rand_from_seed(alt_seed));\n";
+	code += "	CUSTOM.z = (anim_offset + tex_anim_offset) * mix(1.0, anim_offset_rand, anim_offset_random);\n"; // animation offset (0-1)
 
-	code += "		if (RESTART_POSITION) {\n";
+	code += "	if (RESTART_POSITION) {\n";
 
 	switch (emission_shape) {
 		case EMISSION_SHAPE_POINT: {
 			//do none, identity (will later be multiplied by emission transform)
-			code += "			TRANSFORM = mat4(vec4(1,0,0,0),vec4(0,1,0,0),vec4(0,0,1,0),vec4(0,0,0,1));\n";
+			code += "		TRANSFORM = mat4(vec4(1,0,0,0),vec4(0,1,0,0),vec4(0,0,1,0),vec4(0,0,0,1));\n";
 		} break;
 		case EMISSION_SHAPE_SPHERE: {
-			code += "			float s = rand_from_seed(alt_seed) * 2.0 - 1.0;\n";
-			code += "			float t = rand_from_seed(alt_seed) * 2.0 * pi;\n";
-			code += "			float radius = emission_sphere_radius * sqrt(1.0 - s * s);\n";
-			code += "			TRANSFORM[3].xyz = vec3(radius * cos(t), radius * sin(t), emission_sphere_radius * s);\n";
+			code += "		float s = rand_from_seed(alt_seed) * 2.0 - 1.0;\n";
+			code += "		float t = rand_from_seed(alt_seed) * 2.0 * pi;\n";
+			code += "		float radius = emission_sphere_radius * sqrt(1.0 - s * s);\n";
+			code += "		TRANSFORM[3].xyz = vec3(radius * cos(t), radius * sin(t), emission_sphere_radius * s);\n";
 		} break;
 		case EMISSION_SHAPE_BOX: {
-			code += "			TRANSFORM[3].xyz = vec3(rand_from_seed(alt_seed) * 2.0 - 1.0, rand_from_seed(alt_seed) * 2.0 - 1.0, rand_from_seed(alt_seed) * 2.0 - 1.0) * emission_box_extents;\n";
+			code += "		TRANSFORM[3].xyz = vec3(rand_from_seed(alt_seed) * 2.0 - 1.0, rand_from_seed(alt_seed) * 2.0 - 1.0, rand_from_seed(alt_seed) * 2.0 - 1.0) * emission_box_extents;\n";
 		} break;
 		case EMISSION_SHAPE_POINTS:
 		case EMISSION_SHAPE_DIRECTED_POINTS: {
-			code += "			TRANSFORM[3].xyz = texelFetch(emission_texture_points, emission_tex_ofs, 0).xyz;\n";
+			code += "		TRANSFORM[3].xyz = texelFetch(emission_texture_points, emission_tex_ofs, 0).xyz;\n";
 
 			if (emission_shape == EMISSION_SHAPE_DIRECTED_POINTS) {
 				if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-					code += "			mat2 rotm;";
-					code += "			rotm[0] = texelFetch(emission_texture_normal, emission_tex_ofs, 0).xy;\n";
-					code += "			rotm[1] = rotm[0].yx * vec2(1.0, -1.0);\n";
-					code += "			if (RESTART_VELOCITY) VELOCITY.xy = rotm * VELOCITY.xy;\n";
+					code += "		mat2 rotm;";
+					code += "		rotm[0] = texelFetch(emission_texture_normal, emission_tex_ofs, 0).xy;\n";
+					code += "		rotm[1] = rotm[0].yx * vec2(1.0, -1.0);\n";
+					code += "		if (RESTART_VELOCITY) VELOCITY.xy = rotm * VELOCITY.xy;\n";
 				} else {
-					code += "			vec3 normal = texelFetch(emission_texture_normal, emission_tex_ofs, 0).xyz;\n";
-					code += "			vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);\n";
-					code += "			vec3 tangent = normalize(cross(v0, normal));\n";
-					code += "			vec3 bitangent = normalize(cross(tangent, normal));\n";
-					code += "			if (RESTART_VELOCITY) VELOCITY = mat3(tangent, bitangent, normal) * VELOCITY;\n";
+					code += "		vec3 normal = texelFetch(emission_texture_normal, emission_tex_ofs, 0).xyz;\n";
+					code += "		vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);\n";
+					code += "		vec3 tangent = normalize(cross(v0, normal));\n";
+					code += "		vec3 bitangent = normalize(cross(tangent, normal));\n";
+					code += "		if (RESTART_VELOCITY) VELOCITY = mat3(tangent, bitangent, normal) * VELOCITY;\n";
 				}
 			}
 		} break;
@@ -404,134 +401,144 @@ void ParticlesMaterial::_update_shader() {
 		}
 	}
 
-	code += "			if (RESTART_VELOCITY) VELOCITY = (EMISSION_TRANSFORM * vec4(VELOCITY, 0.0)).xyz;\n";
-	code += "			TRANSFORM = EMISSION_TRANSFORM * TRANSFORM;\n";
+	code += "	if (RESTART_VELOCITY) VELOCITY = (EMISSION_TRANSFORM * vec4(VELOCITY, 0.0)).xyz;\n";
+	code += "	TRANSFORM = EMISSION_TRANSFORM * TRANSFORM;\n";
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-		code += "			VELOCITY.z = 0.0;\n";
-		code += "			TRANSFORM[3].z = 0.0;\n";
+		code += "	VELOCITY.z = 0.0;\n";
+		code += "	TRANSFORM[3].z = 0.0;\n";
 	}
-	code += "		}\n";
+	code += "	}\n";
+	code += "}\n\n";
 
-	code += "	} else {\n";
+	code += "void process() {\n";
+	code += "	uint base_number = NUMBER;\n";
+	code += "	uint alt_seed = hash(base_number + uint(1) + RANDOM_SEED);\n";
+	code += "	float angle_rand = rand_from_seed(alt_seed);\n";
+	code += "	float scale_rand = rand_from_seed(alt_seed);\n";
+	code += "	float hue_rot_rand = rand_from_seed(alt_seed);\n";
+	code += "	float anim_offset_rand = rand_from_seed(alt_seed);\n";
+	code += "	float pi = 3.14159;\n";
+	code += "	float degree_to_rad = pi / 180.0;\n";
+	code += "\n";
 
-	code += "		CUSTOM.y += DELTA / LIFETIME;\n";
-	code += "		tv = CUSTOM.y / CUSTOM.w;\n";
+	code += "	CUSTOM.y += DELTA / LIFETIME;\n";
+	code += "	float tv = CUSTOM.y / CUSTOM.w;\n";
 	if (tex_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) {
-		code += "		float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_linear_velocity = textureLod(linear_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_linear_velocity = 0.0;\n";
+		code += "	float tex_linear_velocity = 0.0;\n";
 	}
 
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
 		if (tex_parameters[PARAM_ORBIT_VELOCITY].is_valid()) {
-			code += "		float tex_orbit_velocity = textureLod(orbit_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
+			code += "	float tex_orbit_velocity = textureLod(orbit_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
 		} else {
-			code += "		float tex_orbit_velocity = 0.0;\n";
+			code += "	float tex_orbit_velocity = 0.0;\n";
 		}
 	}
 
 	if (tex_parameters[PARAM_ANGULAR_VELOCITY].is_valid()) {
-		code += "		float tex_angular_velocity = textureLod(angular_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_angular_velocity = textureLod(angular_velocity_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_angular_velocity = 0.0;\n";
+		code += "	float tex_angular_velocity = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_LINEAR_ACCEL].is_valid()) {
-		code += "		float tex_linear_accel = textureLod(linear_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_linear_accel = textureLod(linear_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_linear_accel = 0.0;\n";
+		code += "	float tex_linear_accel = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_RADIAL_ACCEL].is_valid()) {
-		code += "		float tex_radial_accel = textureLod(radial_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_radial_accel = textureLod(radial_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_radial_accel = 0.0;\n";
+		code += "	float tex_radial_accel = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_TANGENTIAL_ACCEL].is_valid()) {
-		code += "		float tex_tangent_accel = textureLod(tangent_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_tangent_accel = textureLod(tangent_accel_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_tangent_accel = 0.0;\n";
+		code += "	float tex_tangent_accel = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_DAMPING].is_valid()) {
-		code += "		float tex_damping = textureLod(damping_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_damping = textureLod(damping_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_damping = 0.0;\n";
+		code += "	float tex_damping = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_ANGLE].is_valid()) {
-		code += "		float tex_angle = textureLod(angle_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_angle = textureLod(angle_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_angle = 0.0;\n";
+		code += "	float tex_angle = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_ANIM_SPEED].is_valid()) {
-		code += "		float tex_anim_speed = textureLod(anim_speed_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_anim_speed = textureLod(anim_speed_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_anim_speed = 0.0;\n";
+		code += "	float tex_anim_speed = 0.0;\n";
 	}
 
 	if (tex_parameters[PARAM_ANIM_OFFSET].is_valid()) {
-		code += "		float tex_anim_offset = textureLod(anim_offset_texture, vec2(tv, 0.0), 0.0).r;\n";
+		code += "	float tex_anim_offset = textureLod(anim_offset_texture, vec2(tv, 0.0), 0.0).r;\n";
 	} else {
-		code += "		float tex_anim_offset = 0.0;\n";
+		code += "	float tex_anim_offset = 0.0;\n";
 	}
 
-	code += "		vec3 force = gravity;\n";
-	code += "		vec3 pos = TRANSFORM[3].xyz;\n";
+	code += "	vec3 force = gravity;\n";
+	code += "	vec3 pos = TRANSFORM[3].xyz;\n";
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-		code += "		pos.z = 0.0;\n";
-	}
-	code += "		// apply linear acceleration\n";
-	code += "		force += length(VELOCITY) > 0.0 ? normalize(VELOCITY) * (linear_accel + tex_linear_accel) * mix(1.0, rand_from_seed(alt_seed), linear_accel_random) : vec3(0.0);\n";
-	code += "		// apply radial acceleration\n";
-	code += "		vec3 org = EMISSION_TRANSFORM[3].xyz;\n";
-	code += "		vec3 diff = pos - org;\n";
-	code += "		force += length(diff) > 0.0 ? normalize(diff) * (radial_accel + tex_radial_accel) * mix(1.0, rand_from_seed(alt_seed), radial_accel_random) : vec3(0.0);\n";
-	code += "		// apply tangential acceleration;\n";
+		code += "	pos.z = 0.0;\n";
+	}
+	code += "	// apply linear acceleration\n";
+	code += "	force += length(VELOCITY) > 0.0 ? normalize(VELOCITY) * (linear_accel + tex_linear_accel) * mix(1.0, rand_from_seed(alt_seed), linear_accel_random) : vec3(0.0);\n";
+	code += "	// apply radial acceleration\n";
+	code += "	vec3 org = EMISSION_TRANSFORM[3].xyz;\n";
+	code += "	vec3 diff = pos - org;\n";
+	code += "	force += length(diff) > 0.0 ? normalize(diff) * (radial_accel + tex_radial_accel) * mix(1.0, rand_from_seed(alt_seed), radial_accel_random) : vec3(0.0);\n";
+	code += "	// apply tangential acceleration;\n";
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-		code += "		force += length(diff.yx) > 0.0 ? vec3(normalize(diff.yx * vec2(-1.0, 1.0)), 0.0) * ((tangent_accel + tex_tangent_accel) * mix(1.0, rand_from_seed(alt_seed), tangent_accel_random)) : vec3(0.0);\n";
+		code += "	force += length(diff.yx) > 0.0 ? vec3(normalize(diff.yx * vec2(-1.0, 1.0)), 0.0) * ((tangent_accel + tex_tangent_accel) * mix(1.0, rand_from_seed(alt_seed), tangent_accel_random)) : vec3(0.0);\n";
 
 	} else {
-		code += "		vec3 crossDiff = cross(normalize(diff), normalize(gravity));\n";
-		code += "		force += length(crossDiff) > 0.0 ? normalize(crossDiff) * ((tangent_accel + tex_tangent_accel) * mix(1.0, rand_from_seed(alt_seed), tangent_accel_random)) : vec3(0.0);\n";
+		code += "	vec3 crossDiff = cross(normalize(diff), normalize(gravity));\n";
+		code += "	force += length(crossDiff) > 0.0 ? normalize(crossDiff) * ((tangent_accel + tex_tangent_accel) * mix(1.0, rand_from_seed(alt_seed), tangent_accel_random)) : vec3(0.0);\n";
 	}
 	if (attractor_interaction_enabled) {
-		code += "		force += ATTRACTOR_FORCE;\n\n";
+		code += "	force += ATTRACTOR_FORCE;\n\n";
 	}
 
-	code += "		// apply attractor forces\n";
-	code += "		VELOCITY += force * DELTA;\n";
-	code += "		// orbit velocity\n";
+	code += "	// apply attractor forces\n";
+	code += "	VELOCITY += force * DELTA;\n";
+	code += "	// orbit velocity\n";
 	if (particle_flags[PARTICLE_FLAG_DISABLE_Z]) {
-		code += "		float orbit_amount = (orbit_velocity + tex_orbit_velocity) * mix(1.0, rand_from_seed(alt_seed), orbit_velocity_random);\n";
-		code += "		if (orbit_amount != 0.0) {\n";
-		code += "		     float ang = orbit_amount * DELTA * pi * 2.0;\n";
-		code += "		     mat2 rot = mat2(vec2(cos(ang), -sin(ang)), vec2(sin(ang), cos(ang)));\n";
-		code += "		     TRANSFORM[3].xy -= diff.xy;\n";
-		code += "		     TRANSFORM[3].xy += rot * diff.xy;\n";
-		code += "		}\n";
+		code += "	float orbit_amount = (orbit_velocity + tex_orbit_velocity) * mix(1.0, rand_from_seed(alt_seed), orbit_velocity_random);\n";
+		code += "	if (orbit_amount != 0.0) {\n";
+		code += "	     float ang = orbit_amount * DELTA * pi * 2.0;\n";
+		code += "	     mat2 rot = mat2(vec2(cos(ang), -sin(ang)), vec2(sin(ang), cos(ang)));\n";
+		code += "	     TRANSFORM[3].xy -= diff.xy;\n";
+		code += "	     TRANSFORM[3].xy += rot * diff.xy;\n";
+		code += "	}\n";
 	}
 
 	if (tex_parameters[PARAM_INITIAL_LINEAR_VELOCITY].is_valid()) {
-		code += "		VELOCITY = normalize(VELOCITY) * tex_linear_velocity;\n";
-	}
-	code += "		if (damping + tex_damping > 0.0) {\n";
-	code += "			float v = length(VELOCITY);\n";
-	code += "			float damp = (damping + tex_damping) * mix(1.0, rand_from_seed(alt_seed), damping_random);\n";
-	code += "			v -= damp * DELTA;\n";
-	code += "			if (v < 0.0) {\n";
-	code += "				VELOCITY = vec3(0.0);\n";
-	code += "			} else {\n";
-	code += "				VELOCITY = normalize(VELOCITY) * v;\n";
-	code += "			}\n";
+		code += "	VELOCITY = normalize(VELOCITY) * tex_linear_velocity;\n";
+	}
+	code += "	if (damping + tex_damping > 0.0) {\n";
+	code += "		float v = length(VELOCITY);\n";
+	code += "		float damp = (damping + tex_damping) * mix(1.0, rand_from_seed(alt_seed), damping_random);\n";
+	code += "		v -= damp * DELTA;\n";
+	code += "		if (v < 0.0) {\n";
+	code += "			VELOCITY = vec3(0.0);\n";
+	code += "		} else {\n";
+	code += "			VELOCITY = normalize(VELOCITY) * v;\n";
 	code += "		}\n";
-	code += "		float base_angle = (initial_angle + tex_angle) * mix(1.0, angle_rand, initial_angle_random);\n";
-	code += "		base_angle += CUSTOM.y * LIFETIME * (angular_velocity + tex_angular_velocity) * mix(1.0, rand_from_seed(alt_seed) * 2.0 - 1.0, angular_velocity_random);\n";
-	code += "		CUSTOM.x = base_angle * degree_to_rad;\n"; // angle
-	code += "		CUSTOM.z = (anim_offset + tex_anim_offset) * mix(1.0, anim_offset_rand, anim_offset_random) + CUSTOM.y * (anim_speed + tex_anim_speed) * mix(1.0, rand_from_seed(alt_seed), anim_speed_random);\n"; // angle
 	code += "	}\n";
+	code += "	float base_angle = (initial_angle + tex_angle) * mix(1.0, angle_rand, initial_angle_random);\n";
+	code += "	base_angle += CUSTOM.y * LIFETIME * (angular_velocity + tex_angular_velocity) * mix(1.0, rand_from_seed(alt_seed) * 2.0 - 1.0, angular_velocity_random);\n";
+	code += "	CUSTOM.x = base_angle * degree_to_rad;\n"; // angle
+	code += "	CUSTOM.z = (anim_offset + tex_anim_offset) * mix(1.0, anim_offset_rand, anim_offset_random) + CUSTOM.y * (anim_speed + tex_anim_speed) * mix(1.0, rand_from_seed(alt_seed), anim_speed_random);\n"; // angle
+
 	// apply color
 	// apply hue rotation
 	if (tex_parameters[PARAM_SCALE].is_valid()) {
@@ -659,7 +666,7 @@ void ParticlesMaterial::_update_shader() {
 		code += "	}";
 	}
 
-	code += "	if (CUSTOM.y > CUSTOM.w) {";
+	code += "	if (CUSTOM.y > CUSTOM.w) {\n";
 	code += "		ACTIVE = false;\n";
 	code += "	}\n";
 	code += "}\n";
diff --git a/scene/resources/sky_material.cpp b/scene/resources/sky_material.cpp
index b2efecb1cb..f50ee9c4c8 100644
--- a/scene/resources/sky_material.cpp
+++ b/scene/resources/sky_material.cpp
@@ -194,7 +194,7 @@ ProceduralSkyMaterial::ProceduralSkyMaterial() {
 	code += "uniform float sun_angle_max = 1.74;\n";
 	code += "uniform float sun_curve : hint_range(0, 1) = 0.05;\n\n";
 	code += "const float PI = 3.1415926535897932384626433833;\n\n";
-	code += "void fragment() {\n";
+	code += "void sky() {\n";
 	code += "\tfloat v_angle = acos(clamp(EYEDIR.y, -1.0, 1.0));\n";
 	code += "\tfloat c = (1.0 - v_angle / (PI * 0.5));\n";
 	code += "\tvec3 sky = mix(sky_horizon_color.rgb, sky_top_color.rgb, clamp(1.0 - pow(1.0 - c, 1.0 / sky_curve), 0.0, 1.0));\n";
@@ -301,7 +301,7 @@ PanoramaSkyMaterial::PanoramaSkyMaterial() {
 	String code = "shader_type sky;\n\n";
 
 	code += "uniform sampler2D source_panorama : filter_linear;\n";
-	code += "void fragment() {\n";
+	code += "void sky() {\n";
 	code += "\tCOLOR = texture(source_panorama, SKY_COORDS).rgb;\n";
 	code += "}";
 
@@ -521,7 +521,7 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() {
 	code += "\treturn fract(p.x * p.y * p.z * (p.x + p.y + p.z));\n";
 	code += "}\n\n";
 
-	code += "void fragment() {\n";
+	code += "void sky() {\n";
 	code += "\tif (LIGHT0_ENABLED) {\n";
 	code += "\t\tfloat zenith_angle = clamp( dot(UP, normalize(LIGHT0_DIRECTION)), -1.0, 1.0 );\n";
 	code += "\t\tfloat sun_energy = max(0.0, 1.0 - exp(-((PI * 0.5) - acos(zenith_angle)))) * SUN_ENERGY * LIGHT0_ENERGY;\n";
diff --git a/scene/resources/visual_shader.cpp b/scene/resources/visual_shader.cpp
index e1e24ddab2..d09cf4b971 100644
--- a/scene/resources/visual_shader.cpp
+++ b/scene/resources/visual_shader.cpp
@@ -961,7 +961,8 @@ static const char *type_string[VisualShader::TYPE_MAX] = {
 	"light",
 	"emit",
 	"process",
-	"end"
+	"end",
+	"sky",
 };
 
 bool VisualShader::_set(const StringName &p_name, const Variant &p_value) {
@@ -1476,7 +1477,7 @@ void VisualShader::_update_shader() const {
 		global_code += "render_mode " + render_mode + ";\n\n";
 	}
 
-	static const char *func_name[TYPE_MAX] = { "vertex", "fragment", "light", "emit", "process", "end" };
+	static const char *func_name[TYPE_MAX] = { "vertex", "fragment", "light", "emit", "process", "end", "sky" };
 
 	String global_expressions;
 	Set<String> used_uniform_names;
@@ -1667,6 +1668,7 @@ void VisualShader::_bind_methods() {
 	BIND_ENUM_CONSTANT(TYPE_EMIT);
 	BIND_ENUM_CONSTANT(TYPE_PROCESS);
 	BIND_ENUM_CONSTANT(TYPE_END);
+	BIND_ENUM_CONSTANT(TYPE_SKY);
 	BIND_ENUM_CONSTANT(TYPE_MAX);
 
 	BIND_CONSTANT(NODE_ID_INVALID);
@@ -1698,7 +1700,6 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_VECTOR, "color", "COLOR.rgb" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_SCALAR, "alpha", "COLOR.a" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_SCALAR, "point_size", "POINT_SIZE" },
-
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "world", "WORLD_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "modelview", "MODELVIEW_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "camera", "CAMERA_MATRIX" },
@@ -1721,10 +1722,8 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "color", "COLOR.rgb" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "alpha", "COLOR.a" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "point_coord", "vec3(POINT_COORD, 0.0)" },
-
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "screen_uv", "vec3(SCREEN_UV, 0.0)" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "side", "float(FRONT_FACING ? 1.0 : 0.0)" },
-
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_TRANSFORM, "world", "WORLD_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_TRANSFORM, "inv_camera", "INV_CAMERA_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_TRANSFORM, "camera", "CAMERA_MATRIX" },
@@ -1750,7 +1749,6 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_VECTOR, "specular", "SPECULAR_LIGHT" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_SCALAR, "roughness", "ROUGHNESS" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_SCALAR, "metallic", "METALLIC" },
-
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_TRANSFORM, "world", "WORLD_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_TRANSFORM, "inv_camera", "INV_CAMERA_MATRIX" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_TRANSFORM, "camera", "CAMERA_MATRIX" },
@@ -1759,6 +1757,7 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_SCALAR, "time", "TIME" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_VECTOR, "viewport_size", "vec3(VIEWPORT_SIZE, 0.0)" },
 	{ Shader::MODE_SPATIAL, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_BOOLEAN, "output_is_srgb", "OUTPUT_IS_SRGB" },
+
 	// Canvas Item, Vertex
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_VECTOR, "vertex", "vec3(VERTEX, 0.0)" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_VECTOR, "uv", "vec3(UV, 0.0)" },
@@ -1766,12 +1765,12 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_SCALAR, "alpha", "COLOR.a" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_SCALAR, "point_size", "POINT_SIZE" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_VECTOR, "texture_pixel_size", "vec3(TEXTURE_PIXEL_SIZE, 1.0)" },
-
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "world", "WORLD_MATRIX" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "canvas", "CANVAS_MATRIX" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_TRANSFORM, "screen", "SCREEN_MATRIX" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_SCALAR, "time", "TIME" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_VERTEX, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_light_pass", "AT_LIGHT_PASS" },
+
 	// Canvas Item, Fragment
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "fragcoord", "FRAGCOORD.xyz" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "uv", "vec3(UV, 0.0)" },
@@ -1789,6 +1788,7 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "specular_shininess", "SPECULAR_SHININESS.rgb" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "specular_shininess_alpha", "SPECULAR_SHININESS.a" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SAMPLER, "specular_shininess_texture", "SPECULAR_SHININESS_TEXTURE" },
+
 	// Canvas Item, Light
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_VECTOR, "fragcoord", "FRAGCOORD.xyz" },
 	{ Shader::MODE_CANVAS_ITEM, VisualShader::TYPE_LIGHT, VisualShaderNode::PORT_TYPE_VECTOR, "uv", "vec3(UV, 0.0)" },
@@ -1856,36 +1856,36 @@ const VisualShaderNodeInput::Port VisualShaderNodeInput::ports[] = {
 	{ Shader::MODE_PARTICLES, VisualShader::TYPE_END, VisualShaderNode::PORT_TYPE_TRANSFORM, "emission_transform", "EMISSION_TRANSFORM" },
 	{ Shader::MODE_PARTICLES, VisualShader::TYPE_END, VisualShaderNode::PORT_TYPE_SCALAR, "time", "TIME" },
 
-	// Sky, Fragment
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_cubemap_pass", "AT_CUBEMAP_PASS" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_half_res_pass", "AT_HALF_RES_PASS" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_quarter_res_pass", "AT_QUARTER_RES_PASS" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "eyedir", "EYEDIR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "half_res_color", "HALF_RES_COLOR.rgb" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "half_res_alpha", "HALF_RES_COLOR.a" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light0_color", "LIGHT0_COLOR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light0_direction", "LIGHT0_DIRECTION" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "light0_enabled", "LIGHT0_ENABLED" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "light0_energy", "LIGHT0_ENERGY" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light1_color", "LIGHT1_COLOR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light1_direction", "LIGHT1_DIRECTION" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "light1_enabled", "LIGHT1_ENABLED" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "light1_energy", "LIGHT1_ENERGY" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light2_color", "LIGHT2_COLOR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light2_direction", "LIGHT2_DIRECTION" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "light2_enabled", "LIGHT2_ENABLED" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "light2_energy", "LIGHT2_ENERGY" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light3_color", "LIGHT3_COLOR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "light3_direction", "LIGHT3_DIRECTION" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_BOOLEAN, "light3_enabled", "LIGHT3_ENABLED" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "light3_energy", "LIGHT3_ENERGY" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "position", "POSITION" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "quarter_res_color", "QUARTER_RES_COLOR.rgb" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "quarter_res_alpha", "QUARTER_RES_COLOR.a" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SAMPLER, "radiance", "RADIANCE" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "screen_uv", "vec3(SCREEN_UV, 0.0)" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "sky_coords", "vec3(SKY_COORDS, 0.0)" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "time", "TIME" },
+	// Sky, Sky
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_cubemap_pass", "AT_CUBEMAP_PASS" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_half_res_pass", "AT_HALF_RES_PASS" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "at_quarter_res_pass", "AT_QUARTER_RES_PASS" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "eyedir", "EYEDIR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "half_res_color", "HALF_RES_COLOR.rgb" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "half_res_alpha", "HALF_RES_COLOR.a" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light0_color", "LIGHT0_COLOR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light0_direction", "LIGHT0_DIRECTION" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "light0_enabled", "LIGHT0_ENABLED" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "light0_energy", "LIGHT0_ENERGY" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light1_color", "LIGHT1_COLOR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light1_direction", "LIGHT1_DIRECTION" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "light1_enabled", "LIGHT1_ENABLED" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "light1_energy", "LIGHT1_ENERGY" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light2_color", "LIGHT2_COLOR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light2_direction", "LIGHT2_DIRECTION" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "light2_enabled", "LIGHT2_ENABLED" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "light2_energy", "LIGHT2_ENERGY" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light3_color", "LIGHT3_COLOR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "light3_direction", "LIGHT3_DIRECTION" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_BOOLEAN, "light3_enabled", "LIGHT3_ENABLED" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "light3_energy", "LIGHT3_ENERGY" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "position", "POSITION" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "quarter_res_color", "QUARTER_RES_COLOR.rgb" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "quarter_res_alpha", "QUARTER_RES_COLOR.a" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SAMPLER, "radiance", "RADIANCE" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "screen_uv", "vec3(SCREEN_UV, 0.0)" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "sky_coords", "vec3(SKY_COORDS, 0.0)" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "time", "TIME" },
 
 	{ Shader::MODE_MAX, VisualShader::TYPE_MAX, VisualShaderNode::PORT_TYPE_TRANSFORM, nullptr, nullptr },
 };
@@ -2449,9 +2449,9 @@ const VisualShaderNodeOutput::Port VisualShaderNodeOutput::ports[] = {
 	{ Shader::MODE_PARTICLES, VisualShader::TYPE_END, VisualShaderNode::PORT_TYPE_SCALAR, "custom_alpha", "CUSTOM.a" },
 	{ Shader::MODE_PARTICLES, VisualShader::TYPE_END, VisualShaderNode::PORT_TYPE_TRANSFORM, "transform", "TRANSFORM" },
 	{ Shader::MODE_PARTICLES, VisualShader::TYPE_END, VisualShaderNode::PORT_TYPE_BOOLEAN, "active", "ACTIVE" },
-	// Sky, Fragment
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_VECTOR, "color", "COLOR" },
-	{ Shader::MODE_SKY, VisualShader::TYPE_FRAGMENT, VisualShaderNode::PORT_TYPE_SCALAR, "alpha", "ALPHA" },
+	// Sky, Sky
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_VECTOR, "color", "COLOR" },
+	{ Shader::MODE_SKY, VisualShader::TYPE_SKY, VisualShaderNode::PORT_TYPE_SCALAR, "alpha", "ALPHA" },
 
 	{ Shader::MODE_MAX, VisualShader::TYPE_MAX, VisualShaderNode::PORT_TYPE_TRANSFORM, nullptr, nullptr },
 };
diff --git a/scene/resources/visual_shader.h b/scene/resources/visual_shader.h
index 54a5c19049..e2e1b473ed 100644
--- a/scene/resources/visual_shader.h
+++ b/scene/resources/visual_shader.h
@@ -54,6 +54,7 @@ public:
 		TYPE_EMIT,
 		TYPE_PROCESS,
 		TYPE_END,
+		TYPE_SKY,
 		TYPE_MAX
 	};
 
diff --git a/scene/resources/world_3d.cpp b/scene/resources/world_3d.cpp
index 0e9f7a6cf2..f067771d58 100644
--- a/scene/resources/world_3d.cpp
+++ b/scene/resources/world_3d.cpp
@@ -348,7 +348,7 @@ World3D::World3D() {
 	navigation_map = NavigationServer3D::get_singleton()->map_create();
 	NavigationServer3D::get_singleton()->map_set_active(navigation_map, true);
 	NavigationServer3D::get_singleton()->map_set_cell_size(navigation_map, GLOBAL_DEF("navigation/3d/default_cell_size", 0.3));
-	NavigationServer3D::get_singleton()->map_set_edge_connection_margin(navigation_map, GLOBAL_DEF("navigation/3d/default_edge_connection_margin", 5.0)); // Five meters, depends a lot on the agent's radius
+	NavigationServer3D::get_singleton()->map_set_edge_connection_margin(navigation_map, GLOBAL_DEF("navigation/3d/default_edge_connection_margin", 0.3));
 
 #ifdef _3D_DISABLED
 	indexer = nullptr;
diff --git a/scene/scene_string_names.cpp b/scene/scene_string_names.cpp
index 892802c103..7575ccd5c3 100644
--- a/scene/scene_string_names.cpp
+++ b/scene/scene_string_names.cpp
@@ -190,10 +190,6 @@ SceneStringNames::SceneStringNames() {
 
 	_default = StaticCString::create("default");
 
-	for (int i = 0; i < MAX_MATERIALS; i++) {
-		mesh_materials[i] = "material/" + itos(i);
-	}
-
 	_window_group = StaticCString::create("_window_group");
 	_window_input = StaticCString::create("_window_input");
 	window_input = StaticCString::create("window_input");
diff --git a/scene/scene_string_names.h b/scene/scene_string_names.h
index 655e49c6f9..a5b489eddc 100644
--- a/scene/scene_string_names.h
+++ b/scene/scene_string_names.h
@@ -216,10 +216,6 @@ public:
 	StringName use_in_baked_light;
 	StringName use_dynamic_gi;
 #endif
-	enum {
-		MAX_MATERIALS = 32
-	};
-	StringName mesh_materials[MAX_MATERIALS];
 };
 
 #endif // SCENE_STRING_NAMES_H
diff --git a/servers/physics_2d/body_2d_sw.cpp b/servers/physics_2d/body_2d_sw.cpp
index d0636047b7..0a91931354 100644
--- a/servers/physics_2d/body_2d_sw.cpp
+++ b/servers/physics_2d/body_2d_sw.cpp
@@ -658,8 +658,6 @@ Body2DSW::Body2DSW() :
 	omit_force_integration = false;
 	applied_torque = 0;
 	island_step = 0;
-	island_next = nullptr;
-	island_list_next = nullptr;
 	_set_static(false);
 	first_time_kinematic = false;
 	linear_damp = -1;
diff --git a/servers/physics_2d/body_2d_sw.h b/servers/physics_2d/body_2d_sw.h
index 60d55ab8bd..7ea4ac697c 100644
--- a/servers/physics_2d/body_2d_sw.h
+++ b/servers/physics_2d/body_2d_sw.h
@@ -125,8 +125,6 @@ class Body2DSW : public CollisionObject2DSW {
 	ForceIntegrationCallback *fi_callback;
 
 	uint64_t island_step;
-	Body2DSW *island_next;
-	Body2DSW *island_list_next;
 
 	_FORCE_INLINE_ void _compute_area_gravity_and_dampenings(const Area2DSW *p_area);
 
@@ -175,12 +173,6 @@ public:
 	_FORCE_INLINE_ uint64_t get_island_step() const { return island_step; }
 	_FORCE_INLINE_ void set_island_step(uint64_t p_step) { island_step = p_step; }
 
-	_FORCE_INLINE_ Body2DSW *get_island_next() const { return island_next; }
-	_FORCE_INLINE_ void set_island_next(Body2DSW *p_next) { island_next = p_next; }
-
-	_FORCE_INLINE_ Body2DSW *get_island_list_next() const { return island_list_next; }
-	_FORCE_INLINE_ void set_island_list_next(Body2DSW *p_next) { island_list_next = p_next; }
-
 	_FORCE_INLINE_ void add_constraint(Constraint2DSW *p_constraint, int p_pos) { constraint_list.push_back({ p_constraint, p_pos }); }
 	_FORCE_INLINE_ void remove_constraint(Constraint2DSW *p_constraint, int p_pos) { constraint_list.erase({ p_constraint, p_pos }); }
 	const List<Pair<Constraint2DSW *, int>> &get_constraint_list() const { return constraint_list; }
diff --git a/servers/physics_2d/broad_phase_2d_hash_grid.cpp b/servers/physics_2d/broad_phase_2d_hash_grid.cpp
index 6cfe6908d1..35447c5389 100644
--- a/servers/physics_2d/broad_phase_2d_hash_grid.cpp
+++ b/servers/physics_2d/broad_phase_2d_hash_grid.cpp
@@ -35,6 +35,12 @@
 #define LARGE_ELEMENT_FI 1.01239812
 
 void BroadPhase2DHashGrid::_pair_attempt(Element *p_elem, Element *p_with) {
+	if (p_elem->owner == p_with->owner) {
+		return;
+	}
+	if (!_test_collision_mask(p_elem->collision_mask, p_elem->collision_layer, p_with->collision_mask, p_with->collision_layer)) {
+		return;
+	}
 	Map<Element *, PairData *>::Element *E = p_elem->paired.find(p_with);
 
 	ERR_FAIL_COND(p_elem->_static && p_with->_static);
@@ -49,6 +55,12 @@ void BroadPhase2DHashGrid::_pair_attempt(Element *p_elem, Element *p_with) {
 }
 
 void BroadPhase2DHashGrid::_unpair_attempt(Element *p_elem, Element *p_with) {
+	if (p_elem->owner == p_with->owner) {
+		return;
+	}
+	if (!_test_collision_mask(p_elem->collision_mask, p_elem->collision_layer, p_with->collision_mask, p_with->collision_layer)) {
+		return;
+	}
 	Map<Element *, PairData *>::Element *E = p_elem->paired.find(p_with);
 
 	ERR_FAIL_COND(!E); //this should really be paired..
@@ -74,24 +86,22 @@ void BroadPhase2DHashGrid::_check_motion(Element *p_elem) {
 		bool physical_collision = p_elem->aabb.intersects(E->key()->aabb);
 		bool logical_collision = p_elem->owner->test_collision_mask(E->key()->owner);
 
-		if (physical_collision) {
-			if (!E->get()->colliding || (logical_collision && !E->get()->ud && pair_callback)) {
+		if (physical_collision && logical_collision) {
+			if (!E->get()->colliding && pair_callback) {
 				E->get()->ud = pair_callback(p_elem->owner, p_elem->subindex, E->key()->owner, E->key()->subindex, pair_userdata);
-			} else if (E->get()->colliding && !logical_collision && E->get()->ud && unpair_callback) {
-				unpair_callback(p_elem->owner, p_elem->subindex, E->key()->owner, E->key()->subindex, E->get()->ud, unpair_userdata);
-				E->get()->ud = nullptr;
 			}
 			E->get()->colliding = true;
-		} else { // No physcial_collision
+		} else { // No collision
 			if (E->get()->colliding && unpair_callback) {
 				unpair_callback(p_elem->owner, p_elem->subindex, E->key()->owner, E->key()->subindex, E->get()->ud, unpair_userdata);
+				E->get()->ud = nullptr;
 			}
 			E->get()->colliding = false;
 		}
 	}
 }
 
-void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, bool p_static) {
+void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, bool p_static, bool p_force_enter) {
 	Vector2 sz = (p_rect.size / cell_size * LARGE_ELEMENT_FI); //use magic number to avoid floating point issues
 	if (sz.width * sz.height > large_object_min_surface) {
 		//large object, do not use grid, must check against all elements
@@ -99,9 +109,6 @@ void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, boo
 			if (E->key() == p_elem->self) {
 				continue; // do not pair against itself
 			}
-			if (E->get().owner == p_elem->owner) {
-				continue;
-			}
 			if (E->get()._static && p_static) {
 				continue;
 			}
@@ -133,7 +140,7 @@ void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, boo
 				pb = pb->next;
 			}
 
-			bool entered = false;
+			bool entered = p_force_enter;
 
 			if (!pb) {
 				//does not exist, create!
@@ -155,17 +162,11 @@ void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, boo
 
 			if (entered) {
 				for (Map<Element *, RC>::Element *E = pb->object_set.front(); E; E = E->next()) {
-					if (E->key()->owner == p_elem->owner) {
-						continue;
-					}
 					_pair_attempt(p_elem, E->key());
 				}
 
 				if (!p_static) {
 					for (Map<Element *, RC>::Element *E = pb->static_object_set.front(); E; E = E->next()) {
-						if (E->key()->owner == p_elem->owner) {
-							continue;
-						}
 						_pair_attempt(p_elem, E->key());
 					}
 				}
@@ -179,18 +180,14 @@ void BroadPhase2DHashGrid::_enter_grid(Element *p_elem, const Rect2 &p_rect, boo
 		if (E->key() == p_elem) {
 			continue; // do not pair against itself
 		}
-		if (E->key()->owner == p_elem->owner) {
-			continue;
-		}
 		if (E->key()->_static && p_static) {
 			continue;
 		}
-
 		_pair_attempt(E->key(), p_elem);
 	}
 }
 
-void BroadPhase2DHashGrid::_exit_grid(Element *p_elem, const Rect2 &p_rect, bool p_static) {
+void BroadPhase2DHashGrid::_exit_grid(Element *p_elem, const Rect2 &p_rect, bool p_static, bool p_force_exit) {
 	Vector2 sz = (p_rect.size / cell_size * LARGE_ELEMENT_FI);
 	if (sz.width * sz.height > large_object_min_surface) {
 		//unpair all elements, instead of checking all, just check what is already paired, so we at least save from checking static vs static
@@ -229,7 +226,7 @@ void BroadPhase2DHashGrid::_exit_grid(Element *p_elem, const Rect2 &p_rect, bool
 
 			ERR_CONTINUE(!pb); //should exist!!
 
-			bool exited = false;
+			bool exited = p_force_exit;
 
 			if (p_static) {
 				if (pb->static_object_set[p_elem].dec() == 0) {
@@ -245,17 +242,11 @@ void BroadPhase2DHashGrid::_exit_grid(Element *p_elem, const Rect2 &p_rect, bool
 
 			if (exited) {
 				for (Map<Element *, RC>::Element *E = pb->object_set.front(); E; E = E->next()) {
-					if (E->key()->owner == p_elem->owner) {
-						continue;
-					}
 					_unpair_attempt(p_elem, E->key());
 				}
 
 				if (!p_static) {
 					for (Map<Element *, RC>::Element *E = pb->static_object_set.front(); E; E = E->next()) {
-						if (E->key()->owner == p_elem->owner) {
-							continue;
-						}
 						_unpair_attempt(p_elem, E->key());
 					}
 				}
@@ -288,9 +279,6 @@ void BroadPhase2DHashGrid::_exit_grid(Element *p_elem, const Rect2 &p_rect, bool
 		if (E->key() == p_elem) {
 			continue; // do not pair against itself
 		}
-		if (E->key()->owner == p_elem->owner) {
-			continue;
-		}
 		if (E->key()->_static && p_static) {
 			continue;
 		}
@@ -306,6 +294,8 @@ BroadPhase2DHashGrid::ID BroadPhase2DHashGrid::create(CollisionObject2DSW *p_obj
 	Element e;
 	e.owner = p_object;
 	e._static = false;
+	e.collision_mask = p_object->get_collision_mask();
+	e.collision_layer = p_object->get_collision_layer();
 	e.subindex = p_subindex;
 	e.self = current;
 	e.pass = 0;
@@ -319,13 +309,26 @@ void BroadPhase2DHashGrid::move(ID p_id, const Rect2 &p_aabb) {
 	ERR_FAIL_COND(!E);
 
 	Element &e = E->get();
+	bool layer_changed = e.collision_mask != e.owner->get_collision_mask() || e.collision_layer != e.owner->get_collision_layer();
 
-	if (p_aabb != e.aabb) {
+	if (p_aabb != e.aabb || layer_changed) {
+		uint32_t old_mask = e.collision_mask;
+		uint32_t old_layer = e.collision_layer;
 		if (p_aabb != Rect2()) {
-			_enter_grid(&e, p_aabb, e._static);
+			e.collision_mask = e.owner->get_collision_mask();
+			e.collision_layer = e.owner->get_collision_layer();
+
+			_enter_grid(&e, p_aabb, e._static, layer_changed);
 		}
 		if (e.aabb != Rect2()) {
-			_exit_grid(&e, e.aabb, e._static);
+			// Need _exit_grid to remove from cells based on the old layer values.
+			e.collision_mask = old_mask;
+			e.collision_layer = old_layer;
+
+			_exit_grid(&e, e.aabb, e._static, layer_changed);
+
+			e.collision_mask = e.owner->get_collision_mask();
+			e.collision_layer = e.owner->get_collision_layer();
 		}
 		e.aabb = p_aabb;
 	}
@@ -344,13 +347,13 @@ void BroadPhase2DHashGrid::set_static(ID p_id, bool p_static) {
 	}
 
 	if (e.aabb != Rect2()) {
-		_exit_grid(&e, e.aabb, e._static);
+		_exit_grid(&e, e.aabb, e._static, false);
 	}
 
 	e._static = p_static;
 
 	if (e.aabb != Rect2()) {
-		_enter_grid(&e, e.aabb, e._static);
+		_enter_grid(&e, e.aabb, e._static, false);
 		_check_motion(&e);
 	}
 }
@@ -362,7 +365,7 @@ void BroadPhase2DHashGrid::remove(ID p_id) {
 	Element &e = E->get();
 
 	if (e.aabb != Rect2()) {
-		_exit_grid(&e, e.aabb, e._static);
+		_exit_grid(&e, e.aabb, e._static, false);
 	}
 
 	element_map.erase(p_id);
diff --git a/servers/physics_2d/broad_phase_2d_hash_grid.h b/servers/physics_2d/broad_phase_2d_hash_grid.h
index eb7c8879ac..bb7c03b989 100644
--- a/servers/physics_2d/broad_phase_2d_hash_grid.h
+++ b/servers/physics_2d/broad_phase_2d_hash_grid.h
@@ -51,6 +51,9 @@ class BroadPhase2DHashGrid : public BroadPhase2DSW {
 		CollisionObject2DSW *owner;
 		bool _static;
 		Rect2 aabb;
+		// Owner's collision_mask/layer, used to detect changes in layers.
+		uint32_t collision_mask;
+		uint32_t collision_layer;
 		int subindex;
 		uint64_t pass;
 		Map<Element *, PairData *> paired;
@@ -115,8 +118,12 @@ class BroadPhase2DHashGrid : public BroadPhase2DSW {
 	UnpairCallback unpair_callback;
 	void *unpair_userdata;
 
-	void _enter_grid(Element *p_elem, const Rect2 &p_rect, bool p_static);
-	void _exit_grid(Element *p_elem, const Rect2 &p_rect, bool p_static);
+	static _FORCE_INLINE_ bool _test_collision_mask(uint32_t p_mask1, uint32_t p_layer1, uint32_t p_mask2, uint32_t p_layer2) {
+		return p_mask1 & p_layer2 || p_mask2 & p_layer1;
+	}
+
+	void _enter_grid(Element *p_elem, const Rect2 &p_rect, bool p_static, bool p_force_enter);
+	void _exit_grid(Element *p_elem, const Rect2 &p_rect, bool p_static, bool p_force_exit);
 	template <bool use_aabb, bool use_segment>
 	_FORCE_INLINE_ void _cull(const Point2i p_cell, const Rect2 &p_aabb, const Point2 &p_from, const Point2 &p_to, CollisionObject2DSW **p_results, int p_max_results, int *p_result_indices, int &index);
 
diff --git a/servers/physics_2d/constraint_2d_sw.h b/servers/physics_2d/constraint_2d_sw.h
index 49ae4dd848..b724deb48e 100644
--- a/servers/physics_2d/constraint_2d_sw.h
+++ b/servers/physics_2d/constraint_2d_sw.h
@@ -37,8 +37,6 @@ class Constraint2DSW {
 	Body2DSW **_body_ptr;
 	int _body_count;
 	uint64_t island_step;
-	Constraint2DSW *island_next;
-	Constraint2DSW *island_list_next;
 	bool disabled_collisions_between_bodies;
 
 	RID self;
@@ -58,12 +56,6 @@ public:
 	_FORCE_INLINE_ uint64_t get_island_step() const { return island_step; }
 	_FORCE_INLINE_ void set_island_step(uint64_t p_step) { island_step = p_step; }
 
-	_FORCE_INLINE_ Constraint2DSW *get_island_next() const { return island_next; }
-	_FORCE_INLINE_ void set_island_next(Constraint2DSW *p_next) { island_next = p_next; }
-
-	_FORCE_INLINE_ Constraint2DSW *get_island_list_next() const { return island_list_next; }
-	_FORCE_INLINE_ void set_island_list_next(Constraint2DSW *p_next) { island_list_next = p_next; }
-
 	_FORCE_INLINE_ Body2DSW **get_body_ptr() const { return _body_ptr; }
 	_FORCE_INLINE_ int get_body_count() const { return _body_count; }
 
diff --git a/servers/physics_2d/joints_2d_sw.cpp b/servers/physics_2d/joints_2d_sw.cpp
index c7b556deba..20d4b9aa1a 100644
--- a/servers/physics_2d/joints_2d_sw.cpp
+++ b/servers/physics_2d/joints_2d_sw.cpp
@@ -97,8 +97,13 @@ normal_relative_velocity(Body2DSW *a, Body2DSW *b, Vector2 rA, Vector2 rB, Vecto
 }
 
 bool PinJoint2DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	Space2DSW *space = A->get_space();
 	ERR_FAIL_COND_V(!space, false);
+
 	rA = A->get_transform().basis_xform(anchor_A);
 	rB = B ? B->get_transform().basis_xform(anchor_B) : anchor_B;
 
@@ -257,6 +262,10 @@ mult_k(const Vector2 &vr, const Vector2 &k1, const Vector2 &k2) {
 }
 
 bool GrooveJoint2DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	// calculate endpoints in worldspace
 	Vector2 ta = A->get_transform().xform(A_groove_1);
 	Vector2 tb = A->get_transform().xform(A_groove_2);
@@ -342,6 +351,10 @@ GrooveJoint2DSW::GrooveJoint2DSW(const Vector2 &p_a_groove1, const Vector2 &p_a_
 //////////////////////////////////////////////
 
 bool DampedSpringJoint2DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer2D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	rA = A->get_transform().basis_xform(anchor_A);
 	rB = B->get_transform().basis_xform(anchor_B);
 
diff --git a/servers/physics_2d/step_2d_sw.cpp b/servers/physics_2d/step_2d_sw.cpp
index 6613d19729..406d750776 100644
--- a/servers/physics_2d/step_2d_sw.cpp
+++ b/servers/physics_2d/step_2d_sw.cpp
@@ -31,19 +31,23 @@
 #include "step_2d_sw.h"
 #include "core/os/os.h"
 
-void Step2DSW::_populate_island(Body2DSW *p_body, Body2DSW **p_island, Constraint2DSW **p_constraint_island) {
+#define BODY_ISLAND_COUNT_RESERVE 128
+#define BODY_ISLAND_SIZE_RESERVE 512
+#define ISLAND_COUNT_RESERVE 128
+#define ISLAND_SIZE_RESERVE 512
+
+void Step2DSW::_populate_island(Body2DSW *p_body, LocalVector<Body2DSW *> &p_body_island, LocalVector<Constraint2DSW *> &p_constraint_island) {
 	p_body->set_island_step(_step);
-	p_body->set_island_next(*p_island);
-	*p_island = p_body;
+	p_body_island.push_back(p_body);
 
-	for (const List<Pair<Constraint2DSW *, int>>::Element *E = p_body->get_constraint_list().front(); E; E = E->next()) {
+	// Faster with reversed iterations.
+	for (const List<Pair<Constraint2DSW *, int>>::Element *E = p_body->get_constraint_list().back(); E; E = E->prev()) {
 		Constraint2DSW *c = (Constraint2DSW *)E->get().first;
 		if (c->get_island_step() == _step) {
 			continue; //already processed
 		}
 		c->set_island_step(_step);
-		c->set_island_next(*p_constraint_island);
-		*p_constraint_island = c;
+		p_constraint_island.push_back(c);
 
 		for (int i = 0; i < c->get_body_count(); i++) {
 			if (i == E->get().second) {
@@ -53,78 +57,62 @@ void Step2DSW::_populate_island(Body2DSW *p_body, Body2DSW **p_island, Constrain
 			if (b->get_island_step() == _step || b->get_mode() == PhysicsServer2D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer2D::BODY_MODE_KINEMATIC) {
 				continue; //no go
 			}
-			_populate_island(c->get_body_ptr()[i], p_island, p_constraint_island);
+			_populate_island(c->get_body_ptr()[i], p_body_island, p_constraint_island);
 		}
 	}
 }
 
-bool Step2DSW::_setup_island(Constraint2DSW *p_island, real_t p_delta) {
-	Constraint2DSW *ci = p_island;
-	Constraint2DSW *prev_ci = nullptr;
-	bool removed_root = false;
-	while (ci) {
-		bool process = ci->setup(p_delta);
-
-		if (!process) {
-			//remove from island if process fails
-			if (prev_ci) {
-				prev_ci->set_island_next(ci->get_island_next());
-			} else {
-				removed_root = true;
-				prev_ci = ci;
-			}
-		} else {
-			prev_ci = ci;
+void Step2DSW::_setup_island(LocalVector<Constraint2DSW *> &p_constraint_island, real_t p_delta) {
+	uint32_t constraint_count = p_constraint_island.size();
+	uint32_t valid_constraint_count = 0;
+	for (uint32_t constraint_index = 0; constraint_index < constraint_count; ++constraint_index) {
+		Constraint2DSW *constraint = p_constraint_island[constraint_index];
+		if (p_constraint_island[constraint_index]->setup(p_delta)) {
+			// Keep this constraint for solving.
+			p_constraint_island[valid_constraint_count++] = constraint;
 		}
-		ci = ci->get_island_next();
 	}
-
-	return removed_root;
+	p_constraint_island.resize(valid_constraint_count);
 }
 
-void Step2DSW::_solve_island(Constraint2DSW *p_island, int p_iterations, real_t p_delta) {
+void Step2DSW::_solve_island(LocalVector<Constraint2DSW *> &p_constraint_island, int p_iterations, real_t p_delta) {
 	for (int i = 0; i < p_iterations; i++) {
-		Constraint2DSW *ci = p_island;
-		while (ci) {
-			ci->solve(p_delta);
-			ci = ci->get_island_next();
+		uint32_t constraint_count = p_constraint_island.size();
+		for (uint32_t constraint_index = 0; constraint_index < constraint_count; ++constraint_index) {
+			p_constraint_island[constraint_index]->solve(p_delta);
 		}
 	}
 }
 
-void Step2DSW::_check_suspend(Body2DSW *p_island, real_t p_delta) {
+void Step2DSW::_check_suspend(const LocalVector<Body2DSW *> &p_body_island, real_t p_delta) {
 	bool can_sleep = true;
 
-	Body2DSW *b = p_island;
-	while (b) {
-		if (b->get_mode() == PhysicsServer2D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer2D::BODY_MODE_KINEMATIC) {
-			b = b->get_island_next();
-			continue; //ignore for static
+	uint32_t body_count = p_body_island.size();
+	for (uint32_t body_index = 0; body_index < body_count; ++body_index) {
+		Body2DSW *body = p_body_island[body_index];
+
+		if (body->get_mode() == PhysicsServer2D::BODY_MODE_STATIC || body->get_mode() == PhysicsServer2D::BODY_MODE_KINEMATIC) {
+			continue; // Ignore for static.
 		}
 
-		if (!b->sleep_test(p_delta)) {
+		if (!body->sleep_test(p_delta)) {
 			can_sleep = false;
 		}
-
-		b = b->get_island_next();
 	}
 
-	//put all to sleep or wake up everyoen
+	// Put all to sleep or wake up everyone.
+	for (uint32_t body_index = 0; body_index < body_count; ++body_index) {
+		Body2DSW *body = p_body_island[body_index];
 
-	b = p_island;
-	while (b) {
-		if (b->get_mode() == PhysicsServer2D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer2D::BODY_MODE_KINEMATIC) {
-			b = b->get_island_next();
-			continue; //ignore for static
+		if (body->get_mode() == PhysicsServer2D::BODY_MODE_STATIC || body->get_mode() == PhysicsServer2D::BODY_MODE_KINEMATIC) {
+			continue; // Ignore for static.
 		}
 
-		bool active = b->is_active();
+		bool active = body->is_active();
 
 		if (active == can_sleep) {
-			b->set_active(!can_sleep);
+			body->set_active(!can_sleep);
 		}
-
-		b = b->get_island_next();
 	}
 }
 
@@ -159,33 +147,43 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* GENERATE CONSTRAINT ISLANDS */
 
-	Body2DSW *island_list = nullptr;
-	Constraint2DSW *constraint_island_list = nullptr;
 	b = body_list->first();
 
-	int island_count = 0;
+	uint32_t body_island_count = 0;
+	uint32_t island_count = 0;
 
 	while (b) {
 		Body2DSW *body = b->self();
 
 		if (body->get_island_step() != _step) {
-			Body2DSW *island = nullptr;
-			Constraint2DSW *constraint_island = nullptr;
-			_populate_island(body, &island, &constraint_island);
+			++body_island_count;
+			if (body_islands.size() < body_island_count) {
+				body_islands.resize(body_island_count);
+			}
+			LocalVector<Body2DSW *> &body_island = body_islands[body_island_count - 1];
+			body_island.clear();
+			body_island.reserve(BODY_ISLAND_SIZE_RESERVE);
 
-			island->set_island_list_next(island_list);
-			island_list = island;
+			++island_count;
+			if (constraint_islands.size() < island_count) {
+				constraint_islands.resize(island_count);
+			}
+			LocalVector<Constraint2DSW *> &constraint_island = constraint_islands[island_count - 1];
+			constraint_island.clear();
+			constraint_island.reserve(ISLAND_SIZE_RESERVE);
 
-			if (constraint_island) {
-				constraint_island->set_island_list_next(constraint_island_list);
-				constraint_island_list = constraint_island;
-				island_count++;
+			_populate_island(body, body_island, constraint_island);
+
+			body_islands.push_back(body_island);
+
+			if (constraint_island.is_empty()) {
+				--island_count;
 			}
 		}
 		b = b->next();
 	}
 
-	p_space->set_island_count(island_count);
+	p_space->set_island_count((int)island_count);
 
 	const SelfList<Area2DSW>::List &aml = p_space->get_moved_area_list();
 
@@ -196,9 +194,13 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 				continue;
 			}
 			c->set_island_step(_step);
-			c->set_island_next(nullptr);
-			c->set_island_list_next(constraint_island_list);
-			constraint_island_list = c;
+			++island_count;
+			if (constraint_islands.size() < island_count) {
+				constraint_islands.resize(island_count);
+			}
+			LocalVector<Constraint2DSW *> &constraint_island = constraint_islands[island_count - 1];
+			constraint_island.clear();
+			constraint_island.push_back(c);
 		}
 		p_space->area_remove_from_moved_list((SelfList<Area2DSW> *)aml.first()); //faster to remove here
 	}
@@ -211,39 +213,8 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SETUP CONSTRAINT ISLANDS */
 
-	{
-		Constraint2DSW *ci = constraint_island_list;
-		Constraint2DSW *prev_ci = nullptr;
-		while (ci) {
-			if (_setup_island(ci, p_delta)) {
-				//removed the root from the island graph because it is not to be processed
-
-				Constraint2DSW *next = ci->get_island_next();
-
-				if (next) {
-					//root from list being deleted no longer exists, replace by next
-					next->set_island_list_next(ci->get_island_list_next());
-					if (prev_ci) {
-						prev_ci->set_island_list_next(next);
-					} else {
-						constraint_island_list = next;
-					}
-					prev_ci = next;
-				} else {
-					//list is empty, just skip
-					if (prev_ci) {
-						prev_ci->set_island_list_next(ci->get_island_list_next());
-
-					} else {
-						constraint_island_list = ci->get_island_list_next();
-					}
-				}
-			} else {
-				prev_ci = ci;
-			}
-
-			ci = ci->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < island_count; ++island_index) {
+		_setup_island(constraint_islands[island_index], p_delta);
 	}
 
 	{ //profile
@@ -254,13 +225,8 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SOLVE CONSTRAINT ISLANDS */
 
-	{
-		Constraint2DSW *ci = constraint_island_list;
-		while (ci) {
-			//iterating each island separatedly improves cache efficiency
-			_solve_island(ci, p_iterations, p_delta);
-			ci = ci->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < island_count; ++island_index) {
+		_solve_island(constraint_islands[island_index], p_iterations, p_delta);
 	}
 
 	{ //profile
@@ -280,12 +246,8 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SLEEP / WAKE UP ISLANDS */
 
-	{
-		Body2DSW *bi = island_list;
-		while (bi) {
-			_check_suspend(bi, p_delta);
-			bi = bi->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < body_island_count; ++island_index) {
+		_check_suspend(body_islands[island_index], p_delta);
 	}
 
 	{ //profile
@@ -301,4 +263,7 @@ void Step2DSW::step(Space2DSW *p_space, real_t p_delta, int p_iterations) {
 
 Step2DSW::Step2DSW() {
 	_step = 1;
+
+	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
+	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
 }
diff --git a/servers/physics_2d/step_2d_sw.h b/servers/physics_2d/step_2d_sw.h
index 83b9130608..5af4a36f52 100644
--- a/servers/physics_2d/step_2d_sw.h
+++ b/servers/physics_2d/step_2d_sw.h
@@ -33,13 +33,18 @@
 
 #include "space_2d_sw.h"
 
+#include "core/templates/local_vector.h"
+
 class Step2DSW {
 	uint64_t _step;
 
-	void _populate_island(Body2DSW *p_body, Body2DSW **p_island, Constraint2DSW **p_constraint_island);
-	bool _setup_island(Constraint2DSW *p_island, real_t p_delta);
-	void _solve_island(Constraint2DSW *p_island, int p_iterations, real_t p_delta);
-	void _check_suspend(Body2DSW *p_island, real_t p_delta);
+	LocalVector<LocalVector<Body2DSW *>> body_islands;
+	LocalVector<LocalVector<Constraint2DSW *>> constraint_islands;
+
+	void _populate_island(Body2DSW *p_body, LocalVector<Body2DSW *> &p_body_island, LocalVector<Constraint2DSW *> &p_constraint_island);
+	void _setup_island(LocalVector<Constraint2DSW *> &p_constraint_island, real_t p_delta);
+	void _solve_island(LocalVector<Constraint2DSW *> &p_constraint_island, int p_iterations, real_t p_delta);
+	void _check_suspend(const LocalVector<Body2DSW *> &p_body_island, real_t p_delta);
 
 public:
 	void step(Space2DSW *p_space, real_t p_delta, int p_iterations);
diff --git a/servers/physics_3d/body_3d_sw.cpp b/servers/physics_3d/body_3d_sw.cpp
index 64ba0cb09d..cc414b7f30 100644
--- a/servers/physics_3d/body_3d_sw.cpp
+++ b/servers/physics_3d/body_3d_sw.cpp
@@ -761,8 +761,6 @@ Body3DSW::Body3DSW() :
 	omit_force_integration = false;
 	//applied_torque=0;
 	island_step = 0;
-	island_next = nullptr;
-	island_list_next = nullptr;
 	first_time_kinematic = false;
 	first_integration = false;
 	_set_static(false);
diff --git a/servers/physics_3d/body_3d_sw.h b/servers/physics_3d/body_3d_sw.h
index e87ff2364b..5790f43019 100644
--- a/servers/physics_3d/body_3d_sw.h
+++ b/servers/physics_3d/body_3d_sw.h
@@ -135,8 +135,6 @@ class Body3DSW : public CollisionObject3DSW {
 	ForceIntegrationCallback *fi_callback;
 
 	uint64_t island_step;
-	Body3DSW *island_next;
-	Body3DSW *island_list_next;
 
 	_FORCE_INLINE_ void _compute_area_gravity_and_dampenings(const Area3DSW *p_area);
 
@@ -189,12 +187,6 @@ public:
 	_FORCE_INLINE_ uint64_t get_island_step() const { return island_step; }
 	_FORCE_INLINE_ void set_island_step(uint64_t p_step) { island_step = p_step; }
 
-	_FORCE_INLINE_ Body3DSW *get_island_next() const { return island_next; }
-	_FORCE_INLINE_ void set_island_next(Body3DSW *p_next) { island_next = p_next; }
-
-	_FORCE_INLINE_ Body3DSW *get_island_list_next() const { return island_list_next; }
-	_FORCE_INLINE_ void set_island_list_next(Body3DSW *p_next) { island_list_next = p_next; }
-
 	_FORCE_INLINE_ void add_constraint(Constraint3DSW *p_constraint, int p_pos) { constraint_map[p_constraint] = p_pos; }
 	_FORCE_INLINE_ void remove_constraint(Constraint3DSW *p_constraint) { constraint_map.erase(p_constraint); }
 	const Map<Constraint3DSW *, int> &get_constraint_map() const { return constraint_map; }
diff --git a/servers/physics_3d/body_pair_3d_sw.cpp b/servers/physics_3d/body_pair_3d_sw.cpp
index 36114c0c91..28c854466f 100644
--- a/servers/physics_3d/body_pair_3d_sw.cpp
+++ b/servers/physics_3d/body_pair_3d_sw.cpp
@@ -281,6 +281,8 @@ bool BodyPair3DSW::setup(real_t p_step) {
 
 	real_t inv_dt = 1.0 / p_step;
 
+	bool do_process = false;
+
 	for (int i = 0; i < contact_count; i++) {
 		Contact &c = contacts[i];
 		c.active = false;
@@ -323,6 +325,7 @@ bool BodyPair3DSW::setup(real_t p_step) {
 		}
 
 		c.active = true;
+		do_process = true;
 
 		// Precompute normal mass, tangent mass, and bias.
 		Vector3 inertia_A = A->get_inv_inertia_tensor().xform(c.rA.cross(c.normal));
@@ -350,7 +353,7 @@ bool BodyPair3DSW::setup(real_t p_step) {
 		}
 	}
 
-	return true;
+	return do_process;
 }
 
 void BodyPair3DSW::solve(real_t p_step) {
@@ -594,6 +597,8 @@ bool BodySoftBodyPair3DSW::setup(real_t p_step) {
 
 	real_t inv_dt = 1.0 / p_step;
 
+	bool do_process = false;
+
 	uint32_t contact_count = contacts.size();
 	for (uint32_t contact_index = 0; contact_index < contact_count; ++contact_index) {
 		Contact &c = contacts[contact_index];
@@ -614,6 +619,7 @@ bool BodySoftBodyPair3DSW::setup(real_t p_step) {
 		}
 
 		c.active = true;
+		do_process = true;
 
 #ifdef DEBUG_ENABLED
 
@@ -645,7 +651,7 @@ bool BodySoftBodyPair3DSW::setup(real_t p_step) {
 		c.depth = depth;
 
 		Vector3 j_vec = c.normal * c.acc_normal_impulse + c.acc_tangent_impulse;
-		body->apply_impulse(c.rA + body->get_center_of_mass(), -j_vec);
+		body->apply_impulse(-j_vec, c.rA + body->get_center_of_mass());
 		soft_body->apply_node_impulse(c.index_B, j_vec);
 		c.acc_bias_impulse = 0;
 		c.acc_bias_impulse_center_of_mass = 0;
@@ -661,7 +667,7 @@ bool BodySoftBodyPair3DSW::setup(real_t p_step) {
 		}
 	}
 
-	return true;
+	return do_process;
 }
 
 void BodySoftBodyPair3DSW::solve(real_t p_step) {
@@ -691,7 +697,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			Vector3 jb = c.normal * (c.acc_bias_impulse - jbnOld);
 
-			body->apply_bias_impulse(c.rA + body->get_center_of_mass(), -jb, MAX_BIAS_ROTATION / p_step);
+			body->apply_bias_impulse(-jb, c.rA + body->get_center_of_mass(), MAX_BIAS_ROTATION / p_step);
 			soft_body->apply_node_bias_impulse(c.index_B, jb);
 
 			crbA = body->get_biased_angular_velocity().cross(c.rA);
@@ -706,8 +712,8 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 				Vector3 jb_com = c.normal * (c.acc_bias_impulse_center_of_mass - jbnOld_com);
 
-				body->apply_bias_impulse(body->get_center_of_mass(), -jb_com, 0.0f);
-				soft_body->apply_node_bias_impulse(c.index_B, -jb_com);
+				body->apply_bias_impulse(-jb_com, body->get_center_of_mass(), 0.0f);
+				soft_body->apply_node_bias_impulse(c.index_B, jb_com);
 			}
 
 			c.active = true;
@@ -726,7 +732,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			Vector3 j = c.normal * (c.acc_normal_impulse - jnOld);
 
-			body->apply_impulse(c.rA + body->get_center_of_mass(), -j);
+			body->apply_impulse(-j, c.rA + body->get_center_of_mass());
 			soft_body->apply_node_impulse(c.index_B, j);
 
 			c.active = true;
@@ -767,7 +773,7 @@ void BodySoftBodyPair3DSW::solve(real_t p_step) {
 
 			jt = c.acc_tangent_impulse - jtOld;
 
-			body->apply_impulse(c.rA + body->get_center_of_mass(), -jt);
+			body->apply_impulse(-jt, c.rA + body->get_center_of_mass());
 			soft_body->apply_node_impulse(c.index_B, jt);
 
 			c.active = true;
diff --git a/servers/physics_3d/constraint_3d_sw.h b/servers/physics_3d/constraint_3d_sw.h
index 2571335c43..16a31e167d 100644
--- a/servers/physics_3d/constraint_3d_sw.h
+++ b/servers/physics_3d/constraint_3d_sw.h
@@ -37,8 +37,6 @@ class Constraint3DSW {
 	Body3DSW **_body_ptr;
 	int _body_count;
 	uint64_t island_step;
-	Constraint3DSW *island_next;
-	Constraint3DSW *island_list_next;
 	int priority;
 	bool disabled_collisions_between_bodies;
 
@@ -60,12 +58,6 @@ public:
 	_FORCE_INLINE_ uint64_t get_island_step() const { return island_step; }
 	_FORCE_INLINE_ void set_island_step(uint64_t p_step) { island_step = p_step; }
 
-	_FORCE_INLINE_ Constraint3DSW *get_island_next() const { return island_next; }
-	_FORCE_INLINE_ void set_island_next(Constraint3DSW *p_next) { island_next = p_next; }
-
-	_FORCE_INLINE_ Constraint3DSW *get_island_list_next() const { return island_list_next; }
-	_FORCE_INLINE_ void set_island_list_next(Constraint3DSW *p_next) { island_list_next = p_next; }
-
 	_FORCE_INLINE_ Body3DSW **get_body_ptr() const { return _body_ptr; }
 	_FORCE_INLINE_ int get_body_count() const { return _body_count; }
 
diff --git a/servers/physics_3d/joints/cone_twist_joint_3d_sw.cpp b/servers/physics_3d/joints/cone_twist_joint_3d_sw.cpp
index 9c4493f4a2..167f797bfe 100644
--- a/servers/physics_3d/joints/cone_twist_joint_3d_sw.cpp
+++ b/servers/physics_3d/joints/cone_twist_joint_3d_sw.cpp
@@ -109,6 +109,10 @@ ConeTwistJoint3DSW::ConeTwistJoint3DSW(Body3DSW *rbA, Body3DSW *rbB, const Trans
 }
 
 bool ConeTwistJoint3DSW::setup(real_t p_timestep) {
+	if ((A->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	m_appliedImpulse = real_t(0.);
 
 	//set bias, sign, clear accumulator
diff --git a/servers/physics_3d/joints/generic_6dof_joint_3d_sw.cpp b/servers/physics_3d/joints/generic_6dof_joint_3d_sw.cpp
index 13b389251f..a86e8b4e76 100644
--- a/servers/physics_3d/joints/generic_6dof_joint_3d_sw.cpp
+++ b/servers/physics_3d/joints/generic_6dof_joint_3d_sw.cpp
@@ -303,6 +303,10 @@ bool Generic6DOFJoint3DSW::testAngularLimitMotor(int axis_index) {
 }
 
 bool Generic6DOFJoint3DSW::setup(real_t p_timestep) {
+	if ((A->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	// Clear accumulated impulses for the next simulation step
 	m_linearLimits.m_accumulatedImpulse = Vector3(real_t(0.), real_t(0.), real_t(0.));
 	int i;
diff --git a/servers/physics_3d/joints/hinge_joint_3d_sw.cpp b/servers/physics_3d/joints/hinge_joint_3d_sw.cpp
index 2b9f0038b4..90b82f4680 100644
--- a/servers/physics_3d/joints/hinge_joint_3d_sw.cpp
+++ b/servers/physics_3d/joints/hinge_joint_3d_sw.cpp
@@ -155,6 +155,10 @@ HingeJoint3DSW::HingeJoint3DSW(Body3DSW *rbA, Body3DSW *rbB, const Vector3 &pivo
 }
 
 bool HingeJoint3DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	m_appliedImpulse = real_t(0.);
 
 	if (!m_angularOnly) {
diff --git a/servers/physics_3d/joints/pin_joint_3d_sw.cpp b/servers/physics_3d/joints/pin_joint_3d_sw.cpp
index 9f708ce151..75d87992d1 100644
--- a/servers/physics_3d/joints/pin_joint_3d_sw.cpp
+++ b/servers/physics_3d/joints/pin_joint_3d_sw.cpp
@@ -50,6 +50,10 @@ subject to the following restrictions:
 #include "pin_joint_3d_sw.h"
 
 bool PinJoint3DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	m_appliedImpulse = real_t(0.);
 
 	Vector3 normal(0, 0, 0);
diff --git a/servers/physics_3d/joints/slider_joint_3d_sw.cpp b/servers/physics_3d/joints/slider_joint_3d_sw.cpp
index 0adc471797..2e1ee8e770 100644
--- a/servers/physics_3d/joints/slider_joint_3d_sw.cpp
+++ b/servers/physics_3d/joints/slider_joint_3d_sw.cpp
@@ -127,6 +127,10 @@ SliderJoint3DSW::SliderJoint3DSW(Body3DSW *rbA, Body3DSW *rbB, const Transform &
 //-----------------------------------------------------------------------------
 
 bool SliderJoint3DSW::setup(real_t p_step) {
+	if ((A->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC) && (B->get_mode() <= PhysicsServer3D::BODY_MODE_KINEMATIC)) {
+		return false;
+	}
+
 	//calculate transforms
 	m_calculatedTransformA = A->get_transform() * m_frameInA;
 	m_calculatedTransformB = B->get_transform() * m_frameInB;
diff --git a/servers/physics_3d/shape_3d_sw.cpp b/servers/physics_3d/shape_3d_sw.cpp
index 4c14cb3162..ccd37ca742 100644
--- a/servers/physics_3d/shape_3d_sw.cpp
+++ b/servers/physics_3d/shape_3d_sw.cpp
@@ -30,10 +30,28 @@
 
 #include "shape_3d_sw.h"
 
+#include "core/io/image.h"
 #include "core/math/geometry_3d.h"
 #include "core/math/quick_hull.h"
 #include "core/templates/sort_array.h"
 
+// HeightMapShape3DSW is based on Bullet btHeightfieldTerrainShape.
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
 #define _EDGE_IS_VALID_SUPPORT_THRESHOLD 0.0002
 #define _FACE_IS_VALID_SUPPORT_THRESHOLD 0.9998
 
@@ -1617,7 +1635,7 @@ ConcavePolygonShape3DSW::ConcavePolygonShape3DSW() {
 
 /* HEIGHT MAP SHAPE */
 
-Vector<real_t> HeightMapShape3DSW::get_heights() const {
+Vector<float> HeightMapShape3DSW::get_heights() const {
 	return heights;
 }
 
@@ -1629,10 +1647,6 @@ int HeightMapShape3DSW::get_depth() const {
 	return depth;
 }
 
-real_t HeightMapShape3DSW::get_cell_size() const {
-	return cell_size;
-}
-
 void HeightMapShape3DSW::project_range(const Vector3 &p_normal, const Transform &p_transform, real_t &r_min, real_t &r_max) const {
 	//not very useful, but not very used either
 	p_transform.xform(get_aabb()).project_range_in_plane(Plane(p_normal, 0), r_min, r_max);
@@ -1643,7 +1657,198 @@ Vector3 HeightMapShape3DSW::get_support(const Vector3 &p_normal) const {
 	return get_aabb().get_support(p_normal);
 }
 
+struct _HeightmapSegmentCullParams {
+	Vector3 from;
+	Vector3 to;
+	Vector3 dir;
+
+	Vector3 result;
+	Vector3 normal;
+
+	const HeightMapShape3DSW *heightmap = nullptr;
+	FaceShape3DSW *face = nullptr;
+};
+
+_FORCE_INLINE_ bool _heightmap_face_cull_segment(_HeightmapSegmentCullParams &p_params) {
+	Vector3 res;
+	Vector3 normal;
+	if (p_params.face->intersect_segment(p_params.from, p_params.to, res, normal)) {
+		p_params.result = res;
+		p_params.normal = normal;
+		return true;
+	}
+
+	return false;
+}
+
+_FORCE_INLINE_ bool _heightmap_cell_cull_segment(_HeightmapSegmentCullParams &p_params, int p_x, int p_z) {
+	// First triangle.
+	p_params.heightmap->_get_point(p_x, p_z, p_params.face->vertex[0]);
+	p_params.heightmap->_get_point(p_x + 1, p_z, p_params.face->vertex[1]);
+	p_params.heightmap->_get_point(p_x, p_z + 1, p_params.face->vertex[2]);
+	p_params.face->normal = Plane(p_params.face->vertex[0], p_params.face->vertex[1], p_params.face->vertex[2]).normal;
+	if (_heightmap_face_cull_segment(p_params)) {
+		return true;
+	}
+
+	// Second triangle.
+	p_params.face->vertex[0] = p_params.face->vertex[1];
+	p_params.heightmap->_get_point(p_x + 1, p_z + 1, p_params.face->vertex[1]);
+	p_params.face->normal = Plane(p_params.face->vertex[0], p_params.face->vertex[1], p_params.face->vertex[2]).normal;
+	if (_heightmap_face_cull_segment(p_params)) {
+		return true;
+	}
+
+	return false;
+}
+
 bool HeightMapShape3DSW::intersect_segment(const Vector3 &p_begin, const Vector3 &p_end, Vector3 &r_point, Vector3 &r_normal) const {
+	if (heights.is_empty()) {
+		return false;
+	}
+
+	Vector3 local_begin = p_begin + local_origin;
+	Vector3 local_end = p_end + local_origin;
+
+	FaceShape3DSW face;
+	face.backface_collision = false;
+
+	_HeightmapSegmentCullParams params;
+	params.from = p_begin;
+	params.to = p_end;
+	params.dir = (p_end - p_begin).normalized();
+	params.heightmap = this;
+	params.face = &face;
+
+	// Quantize the ray begin/end.
+	int begin_x = floor(local_begin.x);
+	int begin_z = floor(local_begin.z);
+	int end_x = floor(local_end.x);
+	int end_z = floor(local_end.z);
+
+	if ((begin_x == end_x) && (begin_z == end_z)) {
+		// Simple case for rays that don't traverse the grid horizontally.
+		// Just perform a test on the given cell.
+		int x = CLAMP(begin_x, 0, width - 2);
+		int z = CLAMP(begin_z, 0, depth - 2);
+		if (_heightmap_cell_cull_segment(params, x, z)) {
+			r_point = params.result;
+			r_normal = params.normal;
+			return true;
+		}
+	} else {
+		// Perform grid query from projected ray.
+		Vector2 ray_dir_proj(local_end.x - local_begin.x, local_end.z - local_begin.z);
+		real_t ray_dist_proj = ray_dir_proj.length();
+
+		if (ray_dist_proj < CMP_EPSILON) {
+			ray_dir_proj = Vector2();
+		} else {
+			ray_dir_proj /= ray_dist_proj;
+		}
+
+		const int x_step = (ray_dir_proj.x > CMP_EPSILON) ? 1 : ((ray_dir_proj.x < -CMP_EPSILON) ? -1 : 0);
+		const int z_step = (ray_dir_proj.y > CMP_EPSILON) ? 1 : ((ray_dir_proj.y < -CMP_EPSILON) ? -1 : 0);
+
+		const real_t infinite = 1e20;
+		const real_t delta_x = (x_step != 0) ? 1.f / Math::abs(ray_dir_proj.x) : infinite;
+		const real_t delta_z = (z_step != 0) ? 1.f / Math::abs(ray_dir_proj.y) : infinite;
+
+		real_t cross_x; // At which value of `param` we will cross a x-axis lane?
+		real_t cross_z; // At which value of `param` we will cross a z-axis lane?
+
+		// X initialization.
+		if (x_step != 0) {
+			if (x_step == 1) {
+				cross_x = (ceil(local_begin.x) - local_begin.x) * delta_x;
+			} else {
+				cross_x = (local_begin.x - floor(local_begin.x)) * delta_x;
+			}
+		} else {
+			cross_x = infinite; // Will never cross on X.
+		}
+
+		// Z initialization.
+		if (z_step != 0) {
+			if (z_step == 1) {
+				cross_z = (ceil(local_begin.z) - local_begin.z) * delta_z;
+			} else {
+				cross_z = (local_begin.z - floor(local_begin.z)) * delta_z;
+			}
+		} else {
+			cross_z = infinite; // Will never cross on Z.
+		}
+
+		int x = floor(local_begin.x);
+		int z = floor(local_begin.z);
+
+		// Workaround cases where the ray starts at an integer position.
+		if (Math::abs(cross_x) < CMP_EPSILON) {
+			cross_x += delta_x;
+			// If going backwards, we should ignore the position we would get by the above flooring,
+			// because the ray is not heading in that direction.
+			if (x_step == -1) {
+				x -= 1;
+			}
+		}
+
+		if (Math::abs(cross_z) < CMP_EPSILON) {
+			cross_z += delta_z;
+			if (z_step == -1) {
+				z -= 1;
+			}
+		}
+
+		// Start inside the grid.
+		int x_start = CLAMP(x, 0, width - 2);
+		int z_start = CLAMP(z, 0, depth - 2);
+
+		// Adjust initial cross values.
+		cross_x += delta_x * x_step * (x_start - x);
+		cross_z += delta_z * z_step * (z_start - z);
+
+		x = x_start;
+		z = z_start;
+
+		if (_heightmap_cell_cull_segment(params, x, z)) {
+			r_point = params.result;
+			r_normal = params.normal;
+			return true;
+		}
+
+		real_t dist = 0.0;
+		while (true) {
+			if (cross_x < cross_z) {
+				// X lane.
+				x += x_step;
+				// Assign before advancing the param,
+				// to be in sync with the initialization step.
+				dist = cross_x;
+				cross_x += delta_x;
+			} else {
+				// Z lane.
+				z += z_step;
+				dist = cross_z;
+				cross_z += delta_z;
+			}
+
+			// Stop when outside the grid.
+			if ((x < 0) || (z < 0) || (x >= width - 1) || (z >= depth - 1)) {
+				break;
+			}
+
+			if (_heightmap_cell_cull_segment(params, x, z)) {
+				r_point = params.result;
+				r_normal = params.normal;
+				return true;
+			}
+
+			if (dist > ray_dist_proj) {
+				break;
+			}
+		}
+	}
+
 	return false;
 }
 
@@ -1655,7 +1860,66 @@ Vector3 HeightMapShape3DSW::get_closest_point_to(const Vector3 &p_point) const {
 	return Vector3();
 }
 
+void HeightMapShape3DSW::_get_cell(const Vector3 &p_point, int &r_x, int &r_y, int &r_z) const {
+	const AABB &aabb = get_aabb();
+
+	Vector3 pos_local = aabb.position + local_origin;
+
+	Vector3 clamped_point(p_point);
+	clamped_point.x = CLAMP(p_point.x, pos_local.x, pos_local.x + aabb.size.x);
+	clamped_point.y = CLAMP(p_point.y, pos_local.y, pos_local.y + aabb.size.y);
+	clamped_point.z = CLAMP(p_point.z, pos_local.z, pos_local.x + aabb.size.z);
+
+	r_x = (clamped_point.x < 0.0) ? (clamped_point.x - 0.5) : (clamped_point.x + 0.5);
+	r_y = (clamped_point.y < 0.0) ? (clamped_point.y - 0.5) : (clamped_point.y + 0.5);
+	r_z = (clamped_point.z < 0.0) ? (clamped_point.z - 0.5) : (clamped_point.z + 0.5);
+}
+
 void HeightMapShape3DSW::cull(const AABB &p_local_aabb, Callback p_callback, void *p_userdata) const {
+	if (heights.is_empty()) {
+		return;
+	}
+
+	AABB local_aabb = p_local_aabb;
+	local_aabb.position += local_origin;
+
+	// Quantize the aabb, and adjust the start/end ranges.
+	int aabb_min[3];
+	int aabb_max[3];
+	_get_cell(local_aabb.position, aabb_min[0], aabb_min[1], aabb_min[2]);
+	_get_cell(local_aabb.position + local_aabb.size, aabb_max[0], aabb_max[1], aabb_max[2]);
+
+	// Expand the min/max quantized values.
+	// This is to catch the case where the input aabb falls between grid points.
+	for (int i = 0; i < 3; ++i) {
+		aabb_min[i]--;
+		aabb_max[i]++;
+	}
+
+	int start_x = MAX(0, aabb_min[0]);
+	int end_x = MIN(width - 1, aabb_max[0]);
+	int start_z = MAX(0, aabb_min[2]);
+	int end_z = MIN(depth - 1, aabb_max[2]);
+
+	FaceShape3DSW face;
+	face.backface_collision = true;
+
+	for (int z = start_z; z < end_z; z++) {
+		for (int x = start_x; x < end_x; x++) {
+			// First triangle.
+			_get_point(x, z, face.vertex[0]);
+			_get_point(x + 1, z, face.vertex[1]);
+			_get_point(x, z + 1, face.vertex[2]);
+			face.normal = Plane(face.vertex[0], face.vertex[2], face.vertex[1]).normal;
+			p_callback(p_userdata, &face);
+
+			// Second triangle.
+			face.vertex[0] = face.vertex[1];
+			_get_point(x + 1, z + 1, face.vertex[1]);
+			face.normal = Plane(face.vertex[0], face.vertex[2], face.vertex[1]).normal;
+			p_callback(p_userdata, &face);
+		}
+	}
 }
 
 Vector3 HeightMapShape3DSW::get_moment_of_inertia(real_t p_mass) const {
@@ -1668,58 +1932,102 @@ Vector3 HeightMapShape3DSW::get_moment_of_inertia(real_t p_mass) const {
 			(p_mass / 3.0) * (extents.x * extents.x + extents.y * extents.y));
 }
 
-void HeightMapShape3DSW::_setup(Vector<real_t> p_heights, int p_width, int p_depth, real_t p_cell_size) {
+void HeightMapShape3DSW::_setup(const Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height) {
 	heights = p_heights;
 	width = p_width;
 	depth = p_depth;
-	cell_size = p_cell_size;
-
-	const real_t *r = heights.ptr();
 
+	// Initialize aabb.
 	AABB aabb;
+	aabb.position = Vector3(0.0, p_min_height, 0.0);
+	aabb.size = Vector3(p_width - 1, p_max_height - p_min_height, p_depth - 1);
 
-	for (int i = 0; i < depth; i++) {
-		for (int j = 0; j < width; j++) {
-			real_t h = r[i * width + j];
+	// Initialize origin as the aabb center.
+	local_origin = aabb.position + 0.5 * aabb.size;
+	local_origin.y = 0.0;
 
-			Vector3 pos(j * cell_size, h, i * cell_size);
-			if (i == 0 || j == 0) {
-				aabb.position = pos;
-			} else {
-				aabb.expand_to(pos);
-			}
-		}
-	}
+	aabb.position -= local_origin;
 
 	configure(aabb);
 }
 
 void HeightMapShape3DSW::set_data(const Variant &p_data) {
 	ERR_FAIL_COND(p_data.get_type() != Variant::DICTIONARY);
+
 	Dictionary d = p_data;
 	ERR_FAIL_COND(!d.has("width"));
 	ERR_FAIL_COND(!d.has("depth"));
-	ERR_FAIL_COND(!d.has("cell_size"));
 	ERR_FAIL_COND(!d.has("heights"));
 
 	int width = d["width"];
 	int depth = d["depth"];
-	real_t cell_size = d["cell_size"];
-	Vector<real_t> heights = d["heights"];
 
-	ERR_FAIL_COND(width <= 0);
-	ERR_FAIL_COND(depth <= 0);
-	ERR_FAIL_COND(cell_size <= CMP_EPSILON);
-	ERR_FAIL_COND(heights.size() != (width * depth));
-	_setup(heights, width, depth, cell_size);
+	ERR_FAIL_COND(width <= 0.0);
+	ERR_FAIL_COND(depth <= 0.0);
+
+	Variant heights_variant = d["heights"];
+	Vector<float> heights_buffer;
+	if (heights_variant.get_type() == Variant::PACKED_FLOAT32_ARRAY) {
+		// Ready-to-use heights can be passed.
+		heights_buffer = heights_variant;
+	} else if (heights_variant.get_type() == Variant::OBJECT) {
+		// If an image is passed, we have to convert it.
+		// This would be expensive to do with a script, so it's nice to have it here.
+		Ref<Image> image = heights_variant;
+		ERR_FAIL_COND(image.is_null());
+		ERR_FAIL_COND(image->get_format() != Image::FORMAT_RF);
+
+		PackedByteArray im_data = image->get_data();
+		heights_buffer.resize(image->get_width() * image->get_height());
+
+		float *w = heights_buffer.ptrw();
+		float *rp = (float *)im_data.ptr();
+		for (int i = 0; i < heights_buffer.size(); ++i) {
+			w[i] = rp[i];
+		}
+	} else {
+		ERR_FAIL_MSG("Expected PackedFloat32Array or float Image.");
+	}
+
+	// Compute min and max heights or use precomputed values.
+	real_t min_height = 0.0;
+	real_t max_height = 0.0;
+	if (d.has("min_height") && d.has("max_height")) {
+		min_height = d["min_height"];
+		max_height = d["max_height"];
+	} else {
+		int heights_size = heights.size();
+		for (int i = 0; i < heights_size; ++i) {
+			float h = heights[i];
+			if (h < min_height) {
+				min_height = h;
+			} else if (h > max_height) {
+				max_height = h;
+			}
+		}
+	}
+
+	ERR_FAIL_COND(min_height > max_height);
+
+	ERR_FAIL_COND(heights_buffer.size() != (width * depth));
+
+	// If specified, min and max height will be used as precomputed values.
+	_setup(heights_buffer, width, depth, min_height, max_height);
 }
 
 Variant HeightMapShape3DSW::get_data() const {
-	ERR_FAIL_V(Variant());
+	Dictionary d;
+	d["width"] = width;
+	d["depth"] = depth;
+
+	const AABB &aabb = get_aabb();
+	d["min_height"] = aabb.position.y;
+	d["max_height"] = aabb.position.y + aabb.size.y;
+
+	d["heights"] = heights;
+
+	return d;
 }
 
 HeightMapShape3DSW::HeightMapShape3DSW() {
-	width = 0;
-	depth = 0;
-	cell_size = 0;
 }
diff --git a/servers/physics_3d/shape_3d_sw.h b/servers/physics_3d/shape_3d_sw.h
index 988e76c699..4d2b6ffbed 100644
--- a/servers/physics_3d/shape_3d_sw.h
+++ b/servers/physics_3d/shape_3d_sw.h
@@ -81,7 +81,7 @@ public:
 
 	virtual PhysicsServer3D::ShapeType get_type() const = 0;
 
-	_FORCE_INLINE_ AABB get_aabb() const { return aabb; }
+	_FORCE_INLINE_ const AABB &get_aabb() const { return aabb; }
 	_FORCE_INLINE_ bool is_configured() const { return configured; }
 
 	virtual bool is_concave() const { return false; }
@@ -389,21 +389,29 @@ public:
 };
 
 struct HeightMapShape3DSW : public ConcaveShape3DSW {
-	Vector<real_t> heights;
-	int width;
-	int depth;
-	real_t cell_size;
+	Vector<float> heights;
+	int width = 0;
+	int depth = 0;
+	Vector3 local_origin;
 
-	//void _cull_segment(int p_idx,_SegmentCullParams *p_params) const;
-	//void _cull(int p_idx,_CullParams *p_params) const;
+	_FORCE_INLINE_ float _get_height(int p_x, int p_z) const {
+		return heights[(p_z * width) + p_x];
+	}
+
+	_FORCE_INLINE_ void _get_point(int p_x, int p_z, Vector3 &r_point) const {
+		r_point.x = p_x - 0.5 * (width - 1.0);
+		r_point.y = _get_height(p_x, p_z);
+		r_point.z = p_z - 0.5 * (depth - 1.0);
+	}
+
+	void _get_cell(const Vector3 &p_point, int &r_x, int &r_y, int &r_z) const;
 
-	void _setup(Vector<real_t> p_heights, int p_width, int p_depth, real_t p_cell_size);
+	void _setup(const Vector<float> &p_heights, int p_width, int p_depth, real_t p_min_height, real_t p_max_height);
 
 public:
-	Vector<real_t> get_heights() const;
+	Vector<float> get_heights() const;
 	int get_width() const;
 	int get_depth() const;
-	real_t get_cell_size() const;
 
 	virtual PhysicsServer3D::ShapeType get_type() const { return PhysicsServer3D::SHAPE_HEIGHTMAP; }
 
diff --git a/servers/physics_3d/step_3d_sw.cpp b/servers/physics_3d/step_3d_sw.cpp
index 2133a38670..06f3227eab 100644
--- a/servers/physics_3d/step_3d_sw.cpp
+++ b/servers/physics_3d/step_3d_sw.cpp
@@ -33,19 +33,23 @@
 
 #include "core/os/os.h"
 
-void Step3DSW::_populate_island(Body3DSW *p_body, Body3DSW **p_island, Constraint3DSW **p_constraint_island) {
+#define BODY_ISLAND_COUNT_RESERVE 128
+#define BODY_ISLAND_SIZE_RESERVE 512
+#define ISLAND_COUNT_RESERVE 128
+#define ISLAND_SIZE_RESERVE 512
+
+void Step3DSW::_populate_island(Body3DSW *p_body, LocalVector<Body3DSW *> &p_body_island, LocalVector<Constraint3DSW *> &p_constraint_island) {
 	p_body->set_island_step(_step);
-	p_body->set_island_next(*p_island);
-	*p_island = p_body;
+	p_body_island.push_back(p_body);
 
-	for (Map<Constraint3DSW *, int>::Element *E = p_body->get_constraint_map().front(); E; E = E->next()) {
+	// Faster with reversed iterations.
+	for (Map<Constraint3DSW *, int>::Element *E = p_body->get_constraint_map().back(); E; E = E->prev()) {
 		Constraint3DSW *c = (Constraint3DSW *)E->key();
 		if (c->get_island_step() == _step) {
 			continue; //already processed
 		}
 		c->set_island_step(_step);
-		c->set_island_next(*p_constraint_island);
-		*p_constraint_island = c;
+		p_constraint_island.push_back(c);
 
 		for (int i = 0; i < c->get_body_count(); i++) {
 			if (i == E->get()) {
@@ -55,87 +59,79 @@ void Step3DSW::_populate_island(Body3DSW *p_body, Body3DSW **p_island, Constrain
 			if (b->get_island_step() == _step || b->get_mode() == PhysicsServer3D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer3D::BODY_MODE_KINEMATIC) {
 				continue; //no go
 			}
-			_populate_island(c->get_body_ptr()[i], p_island, p_constraint_island);
+			_populate_island(c->get_body_ptr()[i], p_body_island, p_constraint_island);
 		}
 	}
 }
 
-void Step3DSW::_setup_island(Constraint3DSW *p_island, real_t p_delta) {
-	Constraint3DSW *ci = p_island;
-	while (ci) {
-		ci->setup(p_delta);
-		//todo remove from island if process fails
-		ci = ci->get_island_next();
+void Step3DSW::_setup_island(LocalVector<Constraint3DSW *> &p_constraint_island, real_t p_delta) {
+	uint32_t constraint_count = p_constraint_island.size();
+	uint32_t valid_constraint_count = 0;
+	for (uint32_t constraint_index = 0; constraint_index < constraint_count; ++constraint_index) {
+		Constraint3DSW *constraint = p_constraint_island[constraint_index];
+		if (p_constraint_island[constraint_index]->setup(p_delta)) {
+			// Keep this constraint for solving.
+			p_constraint_island[valid_constraint_count++] = constraint;
+		}
 	}
+	p_constraint_island.resize(valid_constraint_count);
 }
 
-void Step3DSW::_solve_island(Constraint3DSW *p_island, int p_iterations, real_t p_delta) {
-	int at_priority = 1;
+void Step3DSW::_solve_island(LocalVector<Constraint3DSW *> &p_constraint_island, int p_iterations, real_t p_delta) {
+	int current_priority = 1;
 
-	while (p_island) {
+	uint32_t constraint_count = p_constraint_island.size();
+	while (constraint_count > 0) {
 		for (int i = 0; i < p_iterations; i++) {
-			Constraint3DSW *ci = p_island;
-			while (ci) {
-				ci->solve(p_delta);
-				ci = ci->get_island_next();
+			// Go through all iterations.
+			for (uint32_t constraint_index = 0; constraint_index < constraint_count; ++constraint_index) {
+				p_constraint_island[constraint_index]->solve(p_delta);
 			}
 		}
 
-		at_priority++;
-
-		{
-			Constraint3DSW *ci = p_island;
-			Constraint3DSW *prev = nullptr;
-			while (ci) {
-				if (ci->get_priority() < at_priority) {
-					if (prev) {
-						prev->set_island_next(ci->get_island_next()); //remove
-					} else {
-						p_island = ci->get_island_next();
-					}
-				} else {
-					prev = ci;
-				}
-
-				ci = ci->get_island_next();
+		// Check priority to keep only higher priority constraints.
+		uint32_t priority_constraint_count = 0;
+		++current_priority;
+		for (uint32_t constraint_index = 0; constraint_index < constraint_count; ++constraint_index) {
+			Constraint3DSW *constraint = p_constraint_island[constraint_index];
+			if (constraint->get_priority() >= current_priority) {
+				// Keep this constraint for the next iteration.
+				p_constraint_island[priority_constraint_count++] = constraint;
 			}
 		}
+		constraint_count = priority_constraint_count;
 	}
 }
 
-void Step3DSW::_check_suspend(Body3DSW *p_island, real_t p_delta) {
+void Step3DSW::_check_suspend(const LocalVector<Body3DSW *> &p_body_island, real_t p_delta) {
 	bool can_sleep = true;
 
-	Body3DSW *b = p_island;
-	while (b) {
-		if (b->get_mode() == PhysicsServer3D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer3D::BODY_MODE_KINEMATIC) {
-			b = b->get_island_next();
-			continue; //ignore for static
+	uint32_t body_count = p_body_island.size();
+	for (uint32_t body_index = 0; body_index < body_count; ++body_index) {
+		Body3DSW *body = p_body_island[body_index];
+
+		if (body->get_mode() == PhysicsServer3D::BODY_MODE_STATIC || body->get_mode() == PhysicsServer3D::BODY_MODE_KINEMATIC) {
+			continue; // Ignore for static.
 		}
 
-		if (!b->sleep_test(p_delta)) {
+		if (!body->sleep_test(p_delta)) {
 			can_sleep = false;
 		}
-
-		b = b->get_island_next();
 	}
 
-	//put all to sleep or wake up everyoen
+	// Put all to sleep or wake up everyone.
+	for (uint32_t body_index = 0; body_index < body_count; ++body_index) {
+		Body3DSW *body = p_body_island[body_index];
 
-	b = p_island;
-	while (b) {
-		if (b->get_mode() == PhysicsServer3D::BODY_MODE_STATIC || b->get_mode() == PhysicsServer3D::BODY_MODE_KINEMATIC) {
-			b = b->get_island_next();
-			continue; //ignore for static
+		if (body->get_mode() == PhysicsServer3D::BODY_MODE_STATIC || body->get_mode() == PhysicsServer3D::BODY_MODE_KINEMATIC) {
+			continue; // Ignore for static.
 		}
 
-		bool active = b->is_active();
+		bool active = body->is_active();
 
 		if (active == can_sleep) {
-			b->set_active(!can_sleep);
+			body->set_active(!can_sleep);
 		}
-
-		b = b->get_island_next();
 	}
 }
 
@@ -181,33 +177,43 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* GENERATE CONSTRAINT ISLANDS */
 
-	Body3DSW *island_list = nullptr;
-	Constraint3DSW *constraint_island_list = nullptr;
 	b = body_list->first();
 
-	int island_count = 0;
+	uint32_t body_island_count = 0;
+	uint32_t island_count = 0;
 
 	while (b) {
 		Body3DSW *body = b->self();
 
 		if (body->get_island_step() != _step) {
-			Body3DSW *island = nullptr;
-			Constraint3DSW *constraint_island = nullptr;
-			_populate_island(body, &island, &constraint_island);
+			++body_island_count;
+			if (body_islands.size() < body_island_count) {
+				body_islands.resize(body_island_count);
+			}
+			LocalVector<Body3DSW *> &body_island = body_islands[body_island_count - 1];
+			body_island.clear();
+			body_island.reserve(BODY_ISLAND_SIZE_RESERVE);
 
-			island->set_island_list_next(island_list);
-			island_list = island;
+			++island_count;
+			if (constraint_islands.size() < island_count) {
+				constraint_islands.resize(island_count);
+			}
+			LocalVector<Constraint3DSW *> &constraint_island = constraint_islands[island_count - 1];
+			constraint_island.clear();
+			constraint_island.reserve(ISLAND_SIZE_RESERVE);
 
-			if (constraint_island) {
-				constraint_island->set_island_list_next(constraint_island_list);
-				constraint_island_list = constraint_island;
-				island_count++;
+			_populate_island(body, body_island, constraint_island);
+
+			body_islands.push_back(body_island);
+
+			if (constraint_island.is_empty()) {
+				--island_count;
 			}
 		}
 		b = b->next();
 	}
 
-	p_space->set_island_count(island_count);
+	p_space->set_island_count((int)island_count);
 
 	const SelfList<Area3DSW>::List &aml = p_space->get_moved_area_list();
 
@@ -218,9 +224,13 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 				continue;
 			}
 			c->set_island_step(_step);
-			c->set_island_next(nullptr);
-			c->set_island_list_next(constraint_island_list);
-			constraint_island_list = c;
+			++island_count;
+			if (constraint_islands.size() < island_count) {
+				constraint_islands.resize(island_count);
+			}
+			LocalVector<Constraint3DSW *> &constraint_island = constraint_islands[island_count - 1];
+			constraint_island.clear();
+			constraint_island.push_back(c);
 		}
 		p_space->area_remove_from_moved_list((SelfList<Area3DSW> *)aml.first()); //faster to remove here
 	}
@@ -233,9 +243,13 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 				continue;
 			}
 			c->set_island_step(_step);
-			c->set_island_next(nullptr);
-			c->set_island_list_next(constraint_island_list);
-			constraint_island_list = c;
+			++island_count;
+			if (constraint_islands.size() < island_count) {
+				constraint_islands.resize(island_count);
+			}
+			LocalVector<Constraint3DSW *> &constraint_island = constraint_islands[island_count - 1];
+			constraint_island.clear();
+			constraint_island.push_back(c);
 		}
 		sb = sb->next();
 	}
@@ -248,12 +262,8 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SETUP CONSTRAINT ISLANDS */
 
-	{
-		Constraint3DSW *ci = constraint_island_list;
-		while (ci) {
-			_setup_island(ci, p_delta);
-			ci = ci->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < island_count; ++island_index) {
+		_setup_island(constraint_islands[island_index], p_delta);
 	}
 
 	{ //profile
@@ -264,13 +274,10 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SOLVE CONSTRAINT ISLANDS */
 
-	{
-		Constraint3DSW *ci = constraint_island_list;
-		while (ci) {
-			//iterating each island separatedly improves cache efficiency
-			_solve_island(ci, p_iterations, p_delta);
-			ci = ci->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < island_count; ++island_index) {
+		// Warning: _solve_island modifies the constraint islands for optimization purpose,
+		// their content is not reliable after these calls and shouldn't be used anymore.
+		_solve_island(constraint_islands[island_index], p_iterations, p_delta);
 	}
 
 	{ //profile
@@ -290,12 +297,8 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 
 	/* SLEEP / WAKE UP ISLANDS */
 
-	{
-		Body3DSW *bi = island_list;
-		while (bi) {
-			_check_suspend(bi, p_delta);
-			bi = bi->get_island_list_next();
-		}
+	for (uint32_t island_index = 0; island_index < body_island_count; ++island_index) {
+		_check_suspend(body_islands[island_index], p_delta);
 	}
 
 	/* UPDATE SOFT BODY CONSTRAINTS */
@@ -319,4 +322,7 @@ void Step3DSW::step(Space3DSW *p_space, real_t p_delta, int p_iterations) {
 
 Step3DSW::Step3DSW() {
 	_step = 1;
+
+	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
+	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
 }
diff --git a/servers/physics_3d/step_3d_sw.h b/servers/physics_3d/step_3d_sw.h
index 55c48ec0eb..f406c35c3a 100644
--- a/servers/physics_3d/step_3d_sw.h
+++ b/servers/physics_3d/step_3d_sw.h
@@ -33,13 +33,18 @@
 
 #include "space_3d_sw.h"
 
+#include "core/templates/local_vector.h"
+
 class Step3DSW {
 	uint64_t _step;
 
-	void _populate_island(Body3DSW *p_body, Body3DSW **p_island, Constraint3DSW **p_constraint_island);
-	void _setup_island(Constraint3DSW *p_island, real_t p_delta);
-	void _solve_island(Constraint3DSW *p_island, int p_iterations, real_t p_delta);
-	void _check_suspend(Body3DSW *p_island, real_t p_delta);
+	LocalVector<LocalVector<Body3DSW *>> body_islands;
+	LocalVector<LocalVector<Constraint3DSW *>> constraint_islands;
+
+	void _populate_island(Body3DSW *p_body, LocalVector<Body3DSW *> &p_body_island, LocalVector<Constraint3DSW *> &p_constraint_island);
+	void _setup_island(LocalVector<Constraint3DSW *> &p_constraint_island, real_t p_delta);
+	void _solve_island(LocalVector<Constraint3DSW *> &p_constraint_island, int p_iterations, real_t p_delta);
+	void _check_suspend(const LocalVector<Body3DSW *> &p_body_island, real_t p_delta);
 
 public:
 	void step(Space3DSW *p_space, real_t p_delta, int p_iterations);
diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
index 45f6384b5e..15982b4b29 100644
--- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
@@ -77,6 +77,9 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) {
 	int depth_drawi = DEPTH_DRAW_OPAQUE;
 
 	ShaderCompilerRD::IdentifierActions actions;
+	actions.entry_point_stages["vertex"] = ShaderCompilerRD::STAGE_VERTEX;
+	actions.entry_point_stages["fragment"] = ShaderCompilerRD::STAGE_FRAGMENT;
+	actions.entry_point_stages["light"] = ShaderCompilerRD::STAGE_FRAGMENT;
 
 	actions.render_mode_values["blend_add"] = Pair<int *, int>(&blend_mode, BLEND_MODE_ADD);
 	actions.render_mode_values["blend_mix"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MIX);
@@ -148,7 +151,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) {
 	print_line("\n**fragment_code:\n" + gen_code.fragment);
 	print_line("\n**light_code:\n" + gen_code.light);
 #endif
-	shader_singleton->shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines);
+	shader_singleton->shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompilerRD::STAGE_VERTEX], gen_code.stage_globals[ShaderCompilerRD::STAGE_FRAGMENT], gen_code.defines);
 	ERR_FAIL_COND(!shader_singleton->shader.version_is_valid(version));
 
 	ubo_size = gen_code.uniform_total_size;
diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp
index 7d6e2fa8e4..3c76c91a67 100644
--- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp
@@ -2012,6 +2012,9 @@ void RendererCanvasRenderRD::ShaderData::set_code(const String &p_code) {
 	uses_screen_texture = false;
 
 	ShaderCompilerRD::IdentifierActions actions;
+	actions.entry_point_stages["vertex"] = ShaderCompilerRD::STAGE_VERTEX;
+	actions.entry_point_stages["fragment"] = ShaderCompilerRD::STAGE_FRAGMENT;
+	actions.entry_point_stages["light"] = ShaderCompilerRD::STAGE_FRAGMENT;
 
 	actions.render_mode_values["blend_add"] = Pair<int *, int>(&blend_mode, BLEND_MODE_ADD);
 	actions.render_mode_values["blend_mix"] = Pair<int *, int>(&blend_mode, BLEND_MODE_MIX);
@@ -2048,7 +2051,7 @@ void RendererCanvasRenderRD::ShaderData::set_code(const String &p_code) {
 	print_line("\n**fragment_code:\n" + gen_code.fragment);
 	print_line("\n**light_code:\n" + gen_code.light);
 #endif
-	canvas_singleton->shader.canvas_shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines);
+	canvas_singleton->shader.canvas_shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompilerRD::STAGE_VERTEX], gen_code.stage_globals[ShaderCompilerRD::STAGE_FRAGMENT], gen_code.defines);
 	ERR_FAIL_COND(!canvas_singleton->shader.canvas_shader.version_is_valid(version));
 
 	ubo_size = gen_code.uniform_total_size;
diff --git a/servers/rendering/renderer_rd/renderer_scene_sky_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_sky_rd.cpp
index 769335ac16..54c6e81110 100644
--- a/servers/rendering/renderer_rd/renderer_scene_sky_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_sky_rd.cpp
@@ -50,6 +50,7 @@ void RendererSceneSkyRD::SkyShaderData::set_code(const String &p_code) {
 
 	ShaderCompilerRD::GeneratedCode gen_code;
 	ShaderCompilerRD::IdentifierActions actions;
+	actions.entry_point_stages["sky"] = ShaderCompilerRD::STAGE_FRAGMENT;
 
 	uses_time = false;
 	uses_half_res = false;
@@ -110,7 +111,7 @@ void RendererSceneSkyRD::SkyShaderData::set_code(const String &p_code) {
 	print_line("\n**light_code:\n" + gen_code.light);
 #endif
 
-	scene_singleton->sky.sky_shader.shader.version_set_code(version, gen_code.uniforms, gen_code.vertex_global, gen_code.vertex, gen_code.fragment_global, gen_code.light, gen_code.fragment, gen_code.defines);
+	scene_singleton->sky.sky_shader.shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompilerRD::STAGE_VERTEX], gen_code.stage_globals[ShaderCompilerRD::STAGE_FRAGMENT], gen_code.defines);
 	ERR_FAIL_COND(!scene_singleton->sky.sky_shader.shader.version_is_valid(version));
 
 	ubo_size = gen_code.uniform_total_size;
@@ -759,7 +760,7 @@ void RendererSceneSkyRD::init(RendererStorageRD *p_storage) {
 		sky_shader.default_shader = storage->shader_allocate();
 		storage->shader_initialize(sky_shader.default_shader);
 
-		storage->shader_set_code(sky_shader.default_shader, "shader_type sky; void fragment() { COLOR = vec3(0.0); } \n");
+		storage->shader_set_code(sky_shader.default_shader, "shader_type sky; void sky() { COLOR = vec3(0.0); } \n");
 
 		sky_shader.default_material = storage->material_allocate();
 		storage->material_initialize(sky_shader.default_material);
@@ -840,7 +841,7 @@ void RendererSceneSkyRD::init(RendererStorageRD *p_storage) {
 		sky_scene_state.fog_shader = storage->shader_allocate();
 		storage->shader_initialize(sky_scene_state.fog_shader);
 
-		storage->shader_set_code(sky_scene_state.fog_shader, "shader_type sky; uniform vec4 clear_color; void fragment() { COLOR = clear_color.rgb; } \n");
+		storage->shader_set_code(sky_scene_state.fog_shader, "shader_type sky; uniform vec4 clear_color; void sky() { COLOR = clear_color.rgb; } \n");
 		sky_scene_state.fog_material = storage->material_allocate();
 		storage->material_initialize(sky_scene_state.fog_material);
 
diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp
index b984f850a0..189c5782f4 100644
--- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp
@@ -4781,6 +4781,8 @@ void RendererStorageRD::ParticlesShaderData::set_code(const String &p_code) {
 
 	ShaderCompilerRD::GeneratedCode gen_code;
 	ShaderCompilerRD::IdentifierActions actions;
+	actions.entry_point_stages["start"] = ShaderCompilerRD::STAGE_COMPUTE;
+	actions.entry_point_stages["process"] = ShaderCompilerRD::STAGE_COMPUTE;
 
 	/*
 	uses_time = false;
@@ -4801,7 +4803,7 @@ void RendererStorageRD::ParticlesShaderData::set_code(const String &p_code) {
 		version = base_singleton->particles_shader.shader.version_create();
 	}
 
-	base_singleton->particles_shader.shader.version_set_compute_code(version, gen_code.uniforms, gen_code.compute_global, gen_code.compute, gen_code.defines);
+	base_singleton->particles_shader.shader.version_set_compute_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompilerRD::STAGE_COMPUTE], gen_code.defines);
 	ERR_FAIL_COND(!base_singleton->particles_shader.shader.version_is_valid(version));
 
 	ubo_size = gen_code.uniform_total_size;
@@ -8824,7 +8826,6 @@ RendererStorageRD::RendererStorageRD() {
 		sdf_versions.push_back(""); //one only
 		giprobe_sdf_shader.initialize(sdf_versions);
 		giprobe_sdf_shader_version = giprobe_sdf_shader.version_create();
-		giprobe_sdf_shader.version_set_compute_code(giprobe_sdf_shader_version, "", "", "", Vector<String>());
 		giprobe_sdf_shader_version_shader = giprobe_sdf_shader.version_get_shader(giprobe_sdf_shader_version, 0);
 		giprobe_sdf_shader_pipeline = RD::get_singleton()->compute_pipeline_create(giprobe_sdf_shader_version_shader);
 	}
@@ -8913,7 +8914,7 @@ RendererStorageRD::RendererStorageRD() {
 		// default material and shader for particles shader
 		particles_shader.default_shader = shader_allocate();
 		shader_initialize(particles_shader.default_shader);
-		shader_set_code(particles_shader.default_shader, "shader_type particles; void compute() { COLOR = vec4(1.0); } \n");
+		shader_set_code(particles_shader.default_shader, "shader_type particles; void process() { COLOR = vec4(1.0); } \n");
 		particles_shader.default_material = material_allocate();
 		material_initialize(particles_shader.default_material);
 		material_set_shader(particles_shader.default_material, particles_shader.default_shader);
diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.cpp b/servers/rendering/renderer_rd/shader_compiler_rd.cpp
index 8135d388e1..24ac85bb35 100644
--- a/servers/rendering/renderer_rd/shader_compiler_rd.cpp
+++ b/servers/rendering/renderer_rd/shader_compiler_rd.cpp
@@ -535,9 +535,9 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 				struct_code += "}";
 				struct_code += ";\n";
 
-				r_gen_code.vertex_global += struct_code;
-				r_gen_code.fragment_global += struct_code;
-				r_gen_code.compute_global += struct_code;
+				for (int j = 0; j < STAGE_MAX; j++) {
+					r_gen_code.stage_globals[j] += struct_code;
+				}
 			}
 
 			int max_texture_uniforms = 0;
@@ -590,9 +590,9 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 				ucode += " " + _mkid(E->key());
 				ucode += ";\n";
 				if (SL::is_sampler_type(E->get().type)) {
-					r_gen_code.vertex_global += ucode;
-					r_gen_code.fragment_global += ucode;
-					r_gen_code.compute_global += ucode;
+					for (int j = 0; j < STAGE_MAX; j++) {
+						r_gen_code.stage_globals[j] += ucode;
+					}
 
 					GeneratedCode::Texture texture;
 					texture.name = E->key();
@@ -608,7 +608,6 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 					r_gen_code.texture_uniforms.write[E->get().texture_order] = texture;
 				} else {
 					if (!uses_uniforms) {
-						r_gen_code.defines.push_back(String("#define USE_MATERIAL_UNIFORMS\n"));
 						uses_uniforms = true;
 					}
 					uniform_defines.write[E->get().order] = ucode;
@@ -707,9 +706,10 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 					vcode += "]";
 				}
 				vcode += ";\n";
-				r_gen_code.vertex_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode;
-				r_gen_code.fragment_global += "layout(location=" + itos(index) + ") " + interp_mode + "in " + vcode;
-				r_gen_code.compute_global += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode;
+
+				r_gen_code.stage_globals[STAGE_VERTEX] += "layout(location=" + itos(index) + ") " + interp_mode + "out " + vcode;
+				r_gen_code.stage_globals[STAGE_FRAGMENT] += "layout(location=" + itos(index) + ") " + interp_mode + "in " + vcode;
+
 				index++;
 			}
 
@@ -725,7 +725,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 					gcode += ";\n";
 				}
 				gcode += "} frag_to_light;\n";
-				r_gen_code.fragment_global += gcode;
+				r_gen_code.stage_globals[STAGE_FRAGMENT] += gcode;
 			}
 
 			for (int i = 0; i < pnode->vconstants.size(); i++) {
@@ -747,9 +747,9 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 				gcode += "=";
 				gcode += _dump_node_code(cnode.initializer, p_level, r_gen_code, p_actions, p_default_actions, p_assigning);
 				gcode += ";\n";
-				r_gen_code.vertex_global += gcode;
-				r_gen_code.fragment_global += gcode;
-				r_gen_code.compute_global += gcode;
+				for (int j = 0; j < STAGE_MAX; j++) {
+					r_gen_code.stage_globals[j] += gcode;
+				}
 			}
 
 			Map<StringName, String> function_code;
@@ -765,9 +765,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 
 			//place functions in actual code
 
-			Set<StringName> added_vtx;
-			Set<StringName> added_fragment; //share for light
-			Set<StringName> added_compute; //share for light
+			Set<StringName> added_funcs_per_stage[STAGE_MAX];
 
 			for (int i = 0; i < pnode->functions.size(); i++) {
 				SL::FunctionNode *fnode = pnode->functions[i].function;
@@ -776,24 +774,10 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 
 				current_func_name = fnode->name;
 
-				if (fnode->name == vertex_name) {
-					_dump_function_deps(pnode, fnode->name, function_code, r_gen_code.vertex_global, added_vtx);
-					r_gen_code.vertex = function_code[vertex_name];
-				}
-
-				if (fnode->name == fragment_name) {
-					_dump_function_deps(pnode, fnode->name, function_code, r_gen_code.fragment_global, added_fragment);
-					r_gen_code.fragment = function_code[fragment_name];
-				}
-
-				if (fnode->name == light_name) {
-					_dump_function_deps(pnode, fnode->name, function_code, r_gen_code.fragment_global, added_fragment);
-					r_gen_code.light = function_code[light_name];
-				}
-
-				if (fnode->name == compute_name) {
-					_dump_function_deps(pnode, fnode->name, function_code, r_gen_code.compute_global, added_compute);
-					r_gen_code.compute = function_code[compute_name];
+				if (p_actions.entry_point_stages.has(fnode->name)) {
+					Stage stage = p_actions.entry_point_stages[fnode->name];
+					_dump_function_deps(pnode, fnode->name, function_code, r_gen_code.stage_globals[stage], added_funcs_per_stage[stage]);
+					r_gen_code.code[fnode->name] = function_code[fnode->name];
 				}
 
 				function = nullptr;
@@ -858,7 +842,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 			SL::VariableNode *vnode = (SL::VariableNode *)p_node;
 			bool use_fragment_varying = false;
 
-			if (current_func_name != vertex_name) {
+			if (!(p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_VERTEX)) {
 				if (p_assigning) {
 					if (shader->varyings.has(vnode->name)) {
 						use_fragment_varying = true;
@@ -921,10 +905,10 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 			}
 
 			if (vnode->name == time_name) {
-				if (current_func_name == vertex_name) {
+				if (p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_VERTEX) {
 					r_gen_code.uses_vertex_time = true;
 				}
-				if (current_func_name == fragment_name || current_func_name == light_name) {
+				if (p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_FRAGMENT) {
 					r_gen_code.uses_fragment_time = true;
 				}
 			}
@@ -1003,7 +987,7 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 			SL::ArrayNode *anode = (SL::ArrayNode *)p_node;
 			bool use_fragment_varying = false;
 
-			if (current_func_name != vertex_name) {
+			if (!(p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_VERTEX)) {
 				if (anode->assign_expression != nullptr) {
 					use_fragment_varying = true;
 				} else {
@@ -1059,10 +1043,10 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge
 			}
 
 			if (anode->name == time_name) {
-				if (current_func_name == vertex_name) {
+				if (p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_VERTEX) {
 					r_gen_code.uses_vertex_time = true;
 				}
-				if (current_func_name == fragment_name || current_func_name == light_name) {
+				if (p_actions.entry_point_stages.has(current_func_name) && p_actions.entry_point_stages[current_func_name] == STAGE_FRAGMENT) {
 					r_gen_code.uses_fragment_time = true;
 				}
 			}
@@ -1309,7 +1293,7 @@ ShaderLanguage::DataType ShaderCompilerRD::_get_variable_type(const StringName &
 }
 
 Error ShaderCompilerRD::compile(RS::ShaderMode p_mode, const String &p_code, IdentifierActions *p_actions, const String &p_path, GeneratedCode &r_gen_code) {
-	Error err = parser.compile(p_code, ShaderTypes::get_singleton()->get_functions(p_mode), ShaderTypes::get_singleton()->get_modes(p_mode), ShaderTypes::get_singleton()->get_types(), _get_variable_type);
+	Error err = parser.compile(p_code, ShaderTypes::get_singleton()->get_functions(p_mode), ShaderTypes::get_singleton()->get_modes(p_mode), ShaderLanguage::VaryingFunctionNames(), ShaderTypes::get_singleton()->get_types(), _get_variable_type);
 
 	if (err != OK) {
 		Vector<String> shader = p_code.split("\n");
@@ -1322,13 +1306,10 @@ Error ShaderCompilerRD::compile(RS::ShaderMode p_mode, const String &p_code, Ide
 	}
 
 	r_gen_code.defines.clear();
-	r_gen_code.vertex = String();
-	r_gen_code.vertex_global = String();
-	r_gen_code.fragment = String();
-	r_gen_code.fragment_global = String();
-	r_gen_code.compute = String();
-	r_gen_code.compute_global = String();
-	r_gen_code.light = String();
+	r_gen_code.code.clear();
+	for (int i = 0; i < STAGE_MAX; i++) {
+		r_gen_code.stage_globals[i] = String();
+	}
 	r_gen_code.uses_fragment_time = false;
 	r_gen_code.uses_vertex_time = false;
 	r_gen_code.uses_global_textures = false;
@@ -1348,10 +1329,6 @@ Error ShaderCompilerRD::compile(RS::ShaderMode p_mode, const String &p_code, Ide
 void ShaderCompilerRD::initialize(DefaultIdentifierActions p_actions) {
 	actions = p_actions;
 
-	vertex_name = "vertex";
-	fragment_name = "fragment";
-	compute_name = "compute";
-	light_name = "light";
 	time_name = "TIME";
 
 	List<String> func_list;
diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.h b/servers/rendering/renderer_rd/shader_compiler_rd.h
index 6575829e73..2da127ffa3 100644
--- a/servers/rendering/renderer_rd/shader_compiler_rd.h
+++ b/servers/rendering/renderer_rd/shader_compiler_rd.h
@@ -38,7 +38,16 @@
 
 class ShaderCompilerRD {
 public:
+	enum Stage {
+		STAGE_VERTEX,
+		STAGE_FRAGMENT,
+		STAGE_COMPUTE,
+		STAGE_MAX
+	};
+
 	struct IdentifierActions {
+		Map<StringName, Stage> entry_point_stages;
+
 		Map<StringName, Pair<int *, int>> render_mode_values;
 		Map<StringName, bool *> render_mode_flags;
 		Map<StringName, bool *> usage_flag_pointers;
@@ -63,13 +72,9 @@ public:
 		Vector<uint32_t> uniform_offsets;
 		uint32_t uniform_total_size;
 		String uniforms;
-		String vertex_global;
-		String vertex;
-		String fragment_global;
-		String fragment;
-		String light;
-		String compute_global;
-		String compute;
+		String stage_globals[STAGE_MAX];
+
+		Map<String, String> code;
 
 		bool uses_global_textures;
 		bool uses_fragment_time;
@@ -103,10 +108,6 @@ private:
 	const ShaderLanguage::ShaderNode *shader;
 	const ShaderLanguage::FunctionNode *function;
 	StringName current_func_name;
-	StringName vertex_name;
-	StringName fragment_name;
-	StringName light_name;
-	StringName compute_name;
 	StringName time_name;
 	Set<StringName> texture_functions;
 
diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp
index e4a39ff813..f7242a2b17 100644
--- a/servers/rendering/renderer_rd/shader_rd.cpp
+++ b/servers/rendering/renderer_rd/shader_rd.cpp
@@ -30,146 +30,83 @@
 
 #include "shader_rd.h"
 
-#include "core/string/string_builder.h"
 #include "renderer_compositor_rd.h"
 #include "servers/rendering/rendering_device.h"
 
-void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) {
-	name = p_name;
-	//split vertex and shader code (thank you, shader compiler programmers from you know what company).
-	if (p_vertex_code) {
-		String defines_tag = "\nVERSION_DEFINES";
-		String globals_tag = "\nVERTEX_SHADER_GLOBALS";
-		String material_tag = "\nMATERIAL_UNIFORMS";
-		String code_tag = "\nVERTEX_SHADER_CODE";
-		String code = p_vertex_code;
-
-		int cpos = code.find(defines_tag);
-		if (cpos != -1) {
-			vertex_codev = code.substr(0, cpos).ascii();
-			code = code.substr(cpos + defines_tag.length(), code.length());
-		}
-
-		cpos = code.find(material_tag);
-
-		if (cpos == -1) {
-			vertex_code0 = code.ascii();
-		} else {
-			vertex_code0 = code.substr(0, cpos).ascii();
-			code = code.substr(cpos + material_tag.length(), code.length());
-
-			cpos = code.find(globals_tag);
-
-			if (cpos == -1) {
-				vertex_code1 = code.ascii();
-			} else {
-				vertex_code1 = code.substr(0, cpos).ascii();
-				String code2 = code.substr(cpos + globals_tag.length(), code.length());
-
-				cpos = code2.find(code_tag);
-				if (cpos == -1) {
-					vertex_code2 = code2.ascii();
-				} else {
-					vertex_code2 = code2.substr(0, cpos).ascii();
-					vertex_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii();
+void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) {
+	Vector<String> lines = String(p_code).split("\n");
+
+	String text;
+
+	for (int i = 0; i < lines.size(); i++) {
+		String l = lines[i];
+		bool push_chunk = false;
+
+		StageTemplate::Chunk chunk;
+
+		if (l.begins_with("#VERSION_DEFINES")) {
+			chunk.type = StageTemplate::Chunk::TYPE_VERSION_DEFINES;
+			push_chunk = true;
+		} else if (l.begins_with("#GLOBALS")) {
+			switch (p_stage_type) {
+				case STAGE_TYPE_VERTEX:
+					chunk.type = StageTemplate::Chunk::TYPE_VERTEX_GLOBALS;
+					break;
+				case STAGE_TYPE_FRAGMENT:
+					chunk.type = StageTemplate::Chunk::TYPE_FRAGMENT_GLOBALS;
+					break;
+				case STAGE_TYPE_COMPUTE:
+					chunk.type = StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS;
+					break;
+				default: {
 				}
 			}
-		}
-	}
 
-	if (p_fragment_code) {
-		String defines_tag = "\nVERSION_DEFINES";
-		String globals_tag = "\nFRAGMENT_SHADER_GLOBALS";
-		String material_tag = "\nMATERIAL_UNIFORMS";
-		String code_tag = "\nFRAGMENT_SHADER_CODE";
-		String light_code_tag = "\nLIGHT_SHADER_CODE";
-		String code = p_fragment_code;
-
-		int cpos = code.find(defines_tag);
-		if (cpos != -1) {
-			fragment_codev = code.substr(0, cpos).ascii();
-			code = code.substr(cpos + defines_tag.length(), code.length());
+			push_chunk = true;
+		} else if (l.begins_with("#MATERIAL_UNIFORMS")) {
+			chunk.type = StageTemplate::Chunk::TYPE_MATERIAL_UNIFORMS;
+			push_chunk = true;
+		} else if (l.begins_with("#CODE")) {
+			chunk.type = StageTemplate::Chunk::TYPE_CODE;
+			push_chunk = true;
+			chunk.code = l.replace_first("#CODE", String()).replace(":", "").strip_edges().to_upper();
+		} else {
+			text += l + "\n";
 		}
 
-		cpos = code.find(material_tag);
-		if (cpos == -1) {
-			fragment_code0 = code.ascii();
-		} else {
-			fragment_code0 = code.substr(0, cpos).ascii();
-			//print_line("CODE0:\n"+String(fragment_code0.get_data()));
-			code = code.substr(cpos + material_tag.length(), code.length());
-			cpos = code.find(globals_tag);
-
-			if (cpos == -1) {
-				fragment_code1 = code.ascii();
-			} else {
-				fragment_code1 = code.substr(0, cpos).ascii();
-				//print_line("CODE1:\n"+String(fragment_code1.get_data()));
-
-				String code2 = code.substr(cpos + globals_tag.length(), code.length());
-				cpos = code2.find(light_code_tag);
-
-				if (cpos == -1) {
-					fragment_code2 = code2.ascii();
-				} else {
-					fragment_code2 = code2.substr(0, cpos).ascii();
-					//print_line("CODE2:\n"+String(fragment_code2.get_data()));
-
-					String code3 = code2.substr(cpos + light_code_tag.length(), code2.length());
-
-					cpos = code3.find(code_tag);
-					if (cpos == -1) {
-						fragment_code3 = code3.ascii();
-					} else {
-						fragment_code3 = code3.substr(0, cpos).ascii();
-						//print_line("CODE3:\n"+String(fragment_code3.get_data()));
-						fragment_code4 = code3.substr(cpos + code_tag.length(), code3.length()).ascii();
-						//print_line("CODE4:\n"+String(fragment_code4.get_data()));
-					}
-				}
+		if (push_chunk) {
+			if (text != String()) {
+				StageTemplate::Chunk text_chunk;
+				text_chunk.type = StageTemplate::Chunk::TYPE_TEXT;
+				text_chunk.text = text.utf8();
+				stage_templates[p_stage_type].chunks.push_back(text_chunk);
+				text = String();
 			}
+			stage_templates[p_stage_type].chunks.push_back(chunk);
 		}
 	}
 
+	if (text != String()) {
+		StageTemplate::Chunk text_chunk;
+		text_chunk.type = StageTemplate::Chunk::TYPE_TEXT;
+		text_chunk.text = text.utf8();
+		stage_templates[p_stage_type].chunks.push_back(text_chunk);
+		text = String();
+	}
+}
+
+void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) {
+	name = p_name;
 	if (p_compute_code) {
+		_add_stage(p_compute_code, STAGE_TYPE_COMPUTE);
 		is_compute = true;
-
-		String defines_tag = "\nVERSION_DEFINES";
-		String globals_tag = "\nCOMPUTE_SHADER_GLOBALS";
-		String material_tag = "\nMATERIAL_UNIFORMS";
-		String code_tag = "\nCOMPUTE_SHADER_CODE";
-		String code = p_compute_code;
-
-		int cpos = code.find(defines_tag);
-		if (cpos != -1) {
-			compute_codev = code.substr(0, cpos).ascii();
-			code = code.substr(cpos + defines_tag.length(), code.length());
+	} else {
+		is_compute = false;
+		if (p_vertex_code) {
+			_add_stage(p_vertex_code, STAGE_TYPE_VERTEX);
 		}
-
-		cpos = code.find(material_tag);
-
-		if (cpos == -1) {
-			compute_code0 = code.ascii();
-		} else {
-			compute_code0 = code.substr(0, cpos).ascii();
-			code = code.substr(cpos + material_tag.length(), code.length());
-
-			cpos = code.find(globals_tag);
-
-			if (cpos == -1) {
-				compute_code1 = code.ascii();
-			} else {
-				compute_code1 = code.substr(0, cpos).ascii();
-				String code2 = code.substr(cpos + globals_tag.length(), code.length());
-
-				cpos = code2.find(code_tag);
-				if (cpos == -1) {
-					compute_code2 = code2.ascii();
-				} else {
-					compute_code2 = code2.substr(0, cpos).ascii();
-					compute_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii();
-				}
-			}
+		if (p_fragment_code) {
+			_add_stage(p_fragment_code, STAGE_TYPE_FRAGMENT);
 		}
 	}
 }
@@ -198,6 +135,49 @@ void ShaderRD::_clear_version(Version *p_version) {
 	}
 }
 
+void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template) {
+	for (uint32_t i = 0; i < p_template.chunks.size(); i++) {
+		const StageTemplate::Chunk &chunk = p_template.chunks[i];
+		switch (chunk.type) {
+			case StageTemplate::Chunk::TYPE_VERSION_DEFINES: {
+				builder.append("\n"); //make sure defines begin at newline
+				builder.append(general_defines.get_data());
+				builder.append(variant_defines[p_variant].get_data());
+				for (int j = 0; j < p_version->custom_defines.size(); j++) {
+					builder.append(p_version->custom_defines[j].get_data());
+				}
+				builder.append("\n"); //make sure defines begin at newline
+				if (p_version->uniforms.size()) {
+					builder.append("#define MATERIAL_UNIFORMS_USED\n");
+				}
+				for (Map<StringName, CharString>::Element *E = p_version->code_sections.front(); E; E = E->next()) {
+					builder.append(String("#define ") + String(E->key()) + "_CODE_USED\n");
+				}
+			} break;
+			case StageTemplate::Chunk::TYPE_MATERIAL_UNIFORMS: {
+				builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
+			} break;
+			case StageTemplate::Chunk::TYPE_VERTEX_GLOBALS: {
+				builder.append(p_version->vertex_globals.get_data()); // vertex globals
+			} break;
+			case StageTemplate::Chunk::TYPE_FRAGMENT_GLOBALS: {
+				builder.append(p_version->fragment_globals.get_data()); // fragment globals
+			} break;
+			case StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS: {
+				builder.append(p_version->compute_globals.get_data()); // compute globals
+			} break;
+			case StageTemplate::Chunk::TYPE_CODE: {
+				if (p_version->code_sections.has(chunk.code)) {
+					builder.append(p_version->code_sections[chunk.code].get_data());
+				}
+			} break;
+			case StageTemplate::Chunk::TYPE_TEXT: {
+				builder.append(chunk.text.get_data());
+			} break;
+		}
+	}
+}
+
 void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 	if (!variants_enabled[p_variant]) {
 		return; //variant is disabled, return
@@ -214,29 +194,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 		//vertex stage
 
 		StringBuilder builder;
-
-		builder.append(vertex_codev.get_data()); // version info (if exists)
-		builder.append("\n"); //make sure defines begin at newline
-		builder.append(general_defines.get_data());
-		builder.append(variant_defines[p_variant].get_data());
-
-		for (int j = 0; j < p_version->custom_defines.size(); j++) {
-			builder.append(p_version->custom_defines[j].get_data());
-		}
-
-		builder.append(vertex_code0.get_data()); //first part of vertex
-
-		builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
-
-		builder.append(vertex_code1.get_data()); //second part of vertex
-
-		builder.append(p_version->vertex_globals.get_data()); // vertex globals
-
-		builder.append(vertex_code2.get_data()); //third part of vertex
-
-		builder.append(p_version->vertex_code.get_data()); // code
-
-		builder.append(vertex_code3.get_data()); //fourth of vertex
+		_build_variant_code(builder, p_variant, p_version, stage_templates[STAGE_TYPE_VERTEX]);
 
 		current_source = builder.as_string();
 		RD::ShaderStageData stage;
@@ -254,33 +212,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 		current_stage = RD::SHADER_STAGE_FRAGMENT;
 
 		StringBuilder builder;
-
-		builder.append(fragment_codev.get_data()); // version info (if exists)
-		builder.append("\n"); //make sure defines begin at newline
-
-		builder.append(general_defines.get_data());
-		builder.append(variant_defines[p_variant].get_data());
-		for (int j = 0; j < p_version->custom_defines.size(); j++) {
-			builder.append(p_version->custom_defines[j].get_data());
-		}
-
-		builder.append(fragment_code0.get_data()); //first part of fragment
-
-		builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
-
-		builder.append(fragment_code1.get_data()); //first part of fragment
-
-		builder.append(p_version->fragment_globals.get_data()); // fragment globals
-
-		builder.append(fragment_code2.get_data()); //third part of fragment
-
-		builder.append(p_version->fragment_light.get_data()); // fragment light
-
-		builder.append(fragment_code3.get_data()); //fourth part of fragment
-
-		builder.append(p_version->fragment_code.get_data()); // fragment code
-
-		builder.append(fragment_code4.get_data()); //fourth part of fragment
+		_build_variant_code(builder, p_variant, p_version, stage_templates[STAGE_TYPE_FRAGMENT]);
 
 		current_source = builder.as_string();
 		RD::ShaderStageData stage;
@@ -298,32 +230,10 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 		current_stage = RD::SHADER_STAGE_COMPUTE;
 
 		StringBuilder builder;
-
-		builder.append(compute_codev.get_data()); // version info (if exists)
-		builder.append("\n"); //make sure defines begin at newline
-		builder.append(base_compute_defines.get_data());
-		builder.append(general_defines.get_data());
-		builder.append(variant_defines[p_variant].get_data());
-
-		for (int j = 0; j < p_version->custom_defines.size(); j++) {
-			builder.append(p_version->custom_defines[j].get_data());
-		}
-
-		builder.append(compute_code0.get_data()); //first part of compute
-
-		builder.append(p_version->uniforms.get_data()); //uniforms (same for compute and fragment)
-
-		builder.append(compute_code1.get_data()); //second part of compute
-
-		builder.append(p_version->compute_globals.get_data()); // compute globals
-
-		builder.append(compute_code2.get_data()); //third part of compute
-
-		builder.append(p_version->compute_code.get_data()); // code
-
-		builder.append(compute_code3.get_data()); //fourth of compute
+		_build_variant_code(builder, p_variant, p_version, stage_templates[STAGE_TYPE_COMPUTE]);
 
 		current_source = builder.as_string();
+
 		RD::ShaderStageData stage;
 		stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
 		if (stage.spir_v.size() == 0) {
@@ -364,29 +274,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
 			//vertex stage
 
 			StringBuilder builder;
-
-			builder.append(vertex_codev.get_data()); // version info (if exists)
-			builder.append("\n"); //make sure defines begin at newline
-			builder.append(general_defines.get_data());
-			builder.append(variant_defines[i].get_data());
-
-			for (int j = 0; j < version->custom_defines.size(); j++) {
-				builder.append(version->custom_defines[j].get_data());
-			}
-
-			builder.append(vertex_code0.get_data()); //first part of vertex
-
-			builder.append(version->uniforms.get_data()); //uniforms (same for vertex and fragment)
-
-			builder.append(vertex_code1.get_data()); //second part of vertex
-
-			builder.append(version->vertex_globals.get_data()); // vertex globals
-
-			builder.append(vertex_code2.get_data()); //third part of vertex
-
-			builder.append(version->vertex_code.get_data()); // code
-
-			builder.append(vertex_code3.get_data()); //fourth of vertex
+			_build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_VERTEX]);
 
 			RS::ShaderNativeSourceCode::Version::Stage stage;
 			stage.name = "vertex";
@@ -399,32 +287,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
 			//fragment stage
 
 			StringBuilder builder;
-
-			builder.append(fragment_codev.get_data()); // version info (if exists)
-			builder.append("\n"); //make sure defines begin at newline
-			builder.append(general_defines.get_data());
-			builder.append(variant_defines[i].get_data());
-			for (int j = 0; j < version->custom_defines.size(); j++) {
-				builder.append(version->custom_defines[j].get_data());
-			}
-
-			builder.append(fragment_code0.get_data()); //first part of fragment
-
-			builder.append(version->uniforms.get_data()); //uniforms (same for fragment and fragment)
-
-			builder.append(fragment_code1.get_data()); //first part of fragment
-
-			builder.append(version->fragment_globals.get_data()); // fragment globals
-
-			builder.append(fragment_code2.get_data()); //third part of fragment
-
-			builder.append(version->fragment_light.get_data()); // fragment light
-
-			builder.append(fragment_code3.get_data()); //fourth part of fragment
-
-			builder.append(version->fragment_code.get_data()); // fragment code
-
-			builder.append(fragment_code4.get_data()); //fourth part of fragment
+			_build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_FRAGMENT]);
 
 			RS::ShaderNativeSourceCode::Version::Stage stage;
 			stage.name = "fragment";
@@ -437,30 +300,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
 			//compute stage
 
 			StringBuilder builder;
-
-			builder.append(compute_codev.get_data()); // version info (if exists)
-			builder.append("\n"); //make sure defines begin at newline
-			builder.append(base_compute_defines.get_data());
-			builder.append(general_defines.get_data());
-			builder.append(variant_defines[i].get_data());
-
-			for (int j = 0; j < version->custom_defines.size(); j++) {
-				builder.append(version->custom_defines[j].get_data());
-			}
-
-			builder.append(compute_code0.get_data()); //first part of compute
-
-			builder.append(version->uniforms.get_data()); //uniforms (same for compute and fragment)
-
-			builder.append(compute_code1.get_data()); //second part of compute
-
-			builder.append(version->compute_globals.get_data()); // compute globals
-
-			builder.append(compute_code2.get_data()); //third part of compute
-
-			builder.append(version->compute_code.get_data()); // code
-
-			builder.append(compute_code3.get_data()); //fourth of compute
+			_build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_COMPUTE]);
 
 			RS::ShaderNativeSourceCode::Version::Stage stage;
 			stage.name = "compute";
@@ -518,17 +358,18 @@ void ShaderRD::_compile_version(Version *p_version) {
 	p_version->valid = true;
 }
 
-void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines) {
+void ShaderRD::version_set_code(RID p_version, const Map<String, String> &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector<String> &p_custom_defines) {
 	ERR_FAIL_COND(is_compute);
 
 	Version *version = version_owner.getornull(p_version);
 	ERR_FAIL_COND(!version);
 	version->vertex_globals = p_vertex_globals.utf8();
-	version->vertex_code = p_vertex_code.utf8();
-	version->fragment_light = p_fragment_light.utf8();
 	version->fragment_globals = p_fragment_globals.utf8();
-	version->fragment_code = p_fragment_code.utf8();
 	version->uniforms = p_uniforms.utf8();
+	version->code_sections.clear();
+	for (Map<String, String>::Element *E = p_code.front(); E; E = E->next()) {
+		version->code_sections[StringName(E->key().to_upper())] = E->get().utf8();
+	}
 
 	version->custom_defines.clear();
 	for (int i = 0; i < p_custom_defines.size(); i++) {
@@ -542,15 +383,20 @@ void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const S
 	}
 }
 
-void ShaderRD::version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines) {
+void ShaderRD::version_set_compute_code(RID p_version, const Map<String, String> &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector<String> &p_custom_defines) {
 	ERR_FAIL_COND(!is_compute);
 
 	Version *version = version_owner.getornull(p_version);
 	ERR_FAIL_COND(!version);
+
 	version->compute_globals = p_compute_globals.utf8();
-	version->compute_code = p_compute_code.utf8();
 	version->uniforms = p_uniforms.utf8();
 
+	version->code_sections.clear();
+	for (Map<String, String>::Element *E = p_code.front(); E; E = E->next()) {
+		version->code_sections[StringName(E->key().to_upper())] = E->get().utf8();
+	}
+
 	version->custom_defines.clear();
 	for (int i = 0; i < p_custom_defines.size(); i++) {
 		version->custom_defines.push_back(p_custom_defines[i].utf8());
diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h
index e0f4dcf2d0..f20d539621 100644
--- a/servers/rendering/renderer_rd/shader_rd.h
+++ b/servers/rendering/renderer_rd/shader_rd.h
@@ -32,7 +32,9 @@
 #define SHADER_RD_H
 
 #include "core/os/mutex.h"
+#include "core/string/string_builder.h"
 #include "core/templates/hash_map.h"
+#include "core/templates/local_vector.h"
 #include "core/templates/map.h"
 #include "core/templates/rid_owner.h"
 #include "core/variant/variant.h"
@@ -52,12 +54,9 @@ class ShaderRD {
 	struct Version {
 		CharString uniforms;
 		CharString vertex_globals;
-		CharString vertex_code;
 		CharString compute_globals;
-		CharString compute_code;
-		CharString fragment_light;
 		CharString fragment_globals;
-		CharString fragment_code;
+		Map<StringName, CharString> code_sections;
 		Vector<CharString> custom_defines;
 
 		RID *variants; //same size as version defines
@@ -76,31 +75,44 @@ class ShaderRD {
 
 	RID_Owner<Version> version_owner;
 
-	CharString fragment_codev; //for version and extensions
-	CharString fragment_code0;
-	CharString fragment_code1;
-	CharString fragment_code2;
-	CharString fragment_code3;
-	CharString fragment_code4;
-
-	CharString vertex_codev; //for version and extensions
-	CharString vertex_code0;
-	CharString vertex_code1;
-	CharString vertex_code2;
-	CharString vertex_code3;
+	struct StageTemplate {
+		struct Chunk {
+			enum Type {
+				TYPE_VERSION_DEFINES,
+				TYPE_MATERIAL_UNIFORMS,
+				TYPE_VERTEX_GLOBALS,
+				TYPE_FRAGMENT_GLOBALS,
+				TYPE_COMPUTE_GLOBALS,
+				TYPE_CODE,
+				TYPE_TEXT
+			};
+
+			Type type;
+			StringName code;
+			CharString text;
+		};
+		LocalVector<Chunk> chunks;
+	};
 
 	bool is_compute = false;
 
-	CharString compute_codev; //for version and extensions
-	CharString compute_code0;
-	CharString compute_code1;
-	CharString compute_code2;
-	CharString compute_code3;
-
 	const char *name;
 
 	CharString base_compute_defines;
 
+	enum StageType {
+		STAGE_TYPE_VERTEX,
+		STAGE_TYPE_FRAGMENT,
+		STAGE_TYPE_COMPUTE,
+		STAGE_TYPE_MAX,
+	};
+
+	StageTemplate stage_templates[STAGE_TYPE_MAX];
+
+	void _build_variant_code(StringBuilder &p_builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template);
+
+	void _add_stage(const char *p_code, StageType p_stage_type);
+
 protected:
 	ShaderRD();
 	void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name);
@@ -108,8 +120,8 @@ protected:
 public:
 	RID version_create();
 
-	void version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines);
-	void version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines);
+	void version_set_code(RID p_version, const Map<String, String> &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector<String> &p_custom_defines);
+	void version_set_compute_code(RID p_version, const Map<String, String> &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector<String> &p_custom_defines);
 
 	_FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) {
 		ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID());
diff --git a/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl b/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl
index 63f086a83d..b70e0b6bd5 100644
--- a/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl
+++ b/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define BLOCK_SIZE 8
 
diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl
index 3b39edc70e..8b97ec119f 100644
--- a/servers/rendering/renderer_rd/shaders/canvas.glsl
+++ b/servers/rendering/renderer_rd/shaders/canvas.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #ifdef USE_ATTRIBUTES
 layout(location = 0) in vec2 vertex_attrib;
@@ -26,17 +26,15 @@ layout(location = 3) out vec2 pixel_size_interp;
 
 #endif
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-MATERIAL_UNIFORMS
-	/* clang-format on */
+
+#MATERIAL_UNIFORMS
+
 } material;
 #endif
 
-/* clang-format off */
-VERTEX_SHADER_GLOBALS
-/* clang-format on */
+#GLOBALS
 
 void main() {
 	vec4 instance_custom = vec4(0.0);
@@ -132,9 +130,7 @@ void main() {
 	float point_size = 1.0;
 #endif
 	{
-		/* clang-format off */
-VERTEX_SHADER_CODE
-		/* clang-format on */
+#CODE : VERTEX
 	}
 
 #ifdef USE_NINEPATCH
@@ -212,7 +208,7 @@ VERTEX_SHADER_CODE
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "canvas_uniforms_inc.glsl"
 
@@ -228,11 +224,11 @@ layout(location = 3) in vec2 pixel_size_interp;
 
 layout(location = 0) out vec4 frag_color;
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-MATERIAL_UNIFORMS
-	/* clang-format on */
+
+#MATERIAL_UNIFORMS
+
 } material;
 #endif
 
@@ -260,11 +256,9 @@ vec2 sdf_to_screen_uv(vec2 p_sdf) {
 	return p_sdf * canvas_data.sdf_to_screen;
 }
 
-/* clang-format off */
-FRAGMENT_SHADER_GLOBALS
-/* clang-format on */
+#GLOBALS
 
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 
 vec4 light_compute(
 		vec3 light_vertex,
@@ -278,9 +272,9 @@ vec4 light_compute(
 		vec2 uv,
 		vec4 color, bool is_directional) {
 	vec4 light = vec4(0.0);
-	/* clang-format off */
-LIGHT_SHADER_CODE
-	/* clang-format on */
+
+#CODE : LIGHT
+
 	return light;
 }
 
@@ -356,7 +350,7 @@ vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 lig
 
 //float distance = length(shadow_pos);
 vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 		,
 		vec3 shadow_modulate
 #endif
@@ -395,7 +389,7 @@ vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv
 	}
 
 	vec4 shadow_color = unpackUnorm4x8(light_array.data[light_base].shadow_color);
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 	shadow_color.rgb *= shadow_modulate;
 #endif
 
@@ -504,11 +498,7 @@ void main() {
 		normal_used = true;
 #endif
 
-		/* clang-format off */
-
-FRAGMENT_SHADER_CODE
-
-		/* clang-format on */
+#CODE : FRAGMENT
 
 #if defined(NORMAL_MAP_USED)
 		normal = mix(vec3(0.0, 0.0, 1.0), normal_map * vec3(2.0, -2.0, 1.0) - vec3(1.0, -1.0, 0.0), normal_map_depth);
@@ -543,7 +533,7 @@ FRAGMENT_SHADER_CODE
 		vec2 direction = light_array.data[light_base].position;
 		vec4 light_color = light_array.data[light_base].color;
 
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 
 		vec4 shadow_modulate = vec4(1.0);
 		light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true);
@@ -561,7 +551,7 @@ FRAGMENT_SHADER_CODE
 			vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0);
 
 			light_color = light_shadow_compute(light_base, light_color, shadow_uv
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 					,
 					shadow_modulate.rgb
 #endif
@@ -599,7 +589,7 @@ FRAGMENT_SHADER_CODE
 		vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0);
 		vec4 light_base_color = light_array.data[light_base].color;
 
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 
 		vec4 shadow_modulate = vec4(1.0);
 		vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
@@ -657,7 +647,7 @@ FRAGMENT_SHADER_CODE
 			vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0);
 
 			light_color = light_shadow_compute(light_base, light_color, shadow_uv
-#ifdef LIGHT_SHADER_CODE_USED
+#ifdef LIGHT_CODE_USED
 					,
 					shadow_modulate.rgb
 #endif
diff --git a/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl
index 5c25235c58..9f89f4b3b7 100644
--- a/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl
+++ b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in highp vec3 vertex;
 
@@ -32,7 +32,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(push_constant, binding = 0, std430) uniform Constants {
 	mat4 projection;
diff --git a/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl
index 302ad03b41..65a554e839 100644
--- a/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl
+++ b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
index 70a875192c..40da2c6e5c 100644
--- a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
+++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
index ca92d2104e..da7d189281 100644
--- a/servers/rendering/renderer_rd/shaders/cluster_render.glsl
+++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in vec3 vertex_attrib;
 
@@ -63,7 +63,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) && defined(has_GL_KHR_shader_subgroup_vote)
 
diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
index 5be0893c4f..b0606efa94 100644
--- a/servers/rendering/renderer_rd/shaders/cluster_store.glsl
+++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/copy.glsl b/servers/rendering/renderer_rd/shaders/copy.glsl
index cdd35dfb3f..4110a95ddb 100644
--- a/servers/rendering/renderer_rd/shaders/copy.glsl
+++ b/servers/rendering/renderer_rd/shaders/copy.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl b/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl
index 9751e13b4e..8c68e2dc2f 100644
--- a/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl
+++ b/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) out vec2 uv_interp;
 
@@ -37,7 +37,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(push_constant, binding = 1, std430) uniform Params {
 	vec4 section;
diff --git a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
index c3ac0bee57..dfbce29119 100644
--- a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
+++ b/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(push_constant, binding = 1, std430) uniform Params {
 	float z_far;
@@ -26,7 +26,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in vec2 uv_interp;
 
diff --git a/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl b/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl
index 7f269b7af3..9fa84657d1 100644
--- a/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl
+++ b/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl
@@ -22,7 +22,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define BLOCK_SIZE 8
 
diff --git a/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl b/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl
index 987545fb76..2a774b0eb4 100644
--- a/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl
+++ b/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl
@@ -22,7 +22,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define GROUP_SIZE 64
 
diff --git a/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl b/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl
index 5cbb00baa4..ce7c03c1d4 100644
--- a/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl
+++ b/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define GROUP_SIZE 8
 
diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl
index 92a5682572..bfd5c4c88d 100644
--- a/servers/rendering/renderer_rd/shaders/gi.glsl
+++ b/servers/rendering/renderer_rd/shaders/gi.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl
index b931461b31..49a493cdc7 100644
--- a/servers/rendering/renderer_rd/shaders/giprobe.glsl
+++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #ifdef MODE_DYNAMIC
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
diff --git a/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl b/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl
index 515cc35507..7d4d72967a 100644
--- a/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl
+++ b/servers/rendering/renderer_rd/shaders/giprobe_debug.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 struct CellData {
 	uint position; // xyz 10 bits
@@ -172,7 +172,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in vec4 color_interp;
 layout(location = 0) out vec4 frag_color;
diff --git a/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl b/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl
index 5b3dec0ee7..e20b3f680d 100644
--- a/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl
+++ b/servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl
index 56b3b7ccb4..5dc2d08a3b 100644
--- a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl
+++ b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl
index 8a11c35b78..466442b67a 100644
--- a/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl
+++ b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define BLOCK_SIZE 8
 
diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl
index cb6d8dc7f6..3712220dc4 100644
--- a/servers/rendering/renderer_rd/shaders/particles.glsl
+++ b/servers/rendering/renderer_rd/shaders/particles.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
@@ -146,11 +146,11 @@ layout(set = 2, binding = 1) uniform texture2D height_field_texture;
 
 /* SET 3: MATERIAL */
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = 3, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-MATERIAL_UNIFORMS
-	/* clang-format on */
+
+#MATERIAL_UNIFORMS
+
 } material;
 #endif
 
@@ -196,11 +196,7 @@ bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom
 	return true;
 }
 
-/* clang-format off */
-
-COMPUTE_SHADER_GLOBALS
-
-/* clang-format on */
+#GLOBALS
 
 void main() {
 	uint particle = gl_GlobalInvocationID.x;
@@ -256,6 +252,115 @@ void main() {
 
 	/* Process physics if active */
 
+	if (params.sub_emitter_mode) {
+		if (!PARTICLE.is_active) {
+			int src_index = atomicAdd(src_particles.particle_count, -1) - 1;
+
+			if (src_index >= 0) {
+				PARTICLE.is_active = true;
+				restart = true;
+
+				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_POSITION)) {
+					PARTICLE.xform[3] = src_particles.data[src_index].xform[3];
+				} else {
+					PARTICLE.xform[3] = vec4(0, 0, 0, 1);
+					restart_position = true;
+				}
+				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_ROTATION_SCALE)) {
+					PARTICLE.xform[0] = src_particles.data[src_index].xform[0];
+					PARTICLE.xform[1] = src_particles.data[src_index].xform[1];
+					PARTICLE.xform[2] = src_particles.data[src_index].xform[2];
+				} else {
+					PARTICLE.xform[0] = vec4(1, 0, 0, 0);
+					PARTICLE.xform[1] = vec4(0, 1, 0, 0);
+					PARTICLE.xform[2] = vec4(0, 0, 1, 0);
+					restart_rotation_scale = true;
+				}
+				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_VELOCITY)) {
+					PARTICLE.velocity = src_particles.data[src_index].velocity;
+				} else {
+					PARTICLE.velocity = vec3(0);
+					restart_velocity = true;
+				}
+				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_COLOR)) {
+					PARTICLE.color = src_particles.data[src_index].color;
+				} else {
+					PARTICLE.color = vec4(1);
+					restart_color = true;
+				}
+
+				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_CUSTOM)) {
+					PARTICLE.custom = src_particles.data[src_index].custom;
+				} else {
+					PARTICLE.custom = vec4(0);
+					restart_custom = true;
+				}
+			}
+		}
+
+	} else if (FRAME.emitting) {
+		float restart_phase = float(index) / float(params.total_particles);
+
+		if (FRAME.randomness > 0.0) {
+			uint seed = FRAME.cycle;
+			if (restart_phase >= FRAME.system_phase) {
+				seed -= uint(1);
+			}
+			seed *= uint(params.total_particles);
+			seed += uint(index);
+			float random = float(hash(seed) % uint(65536)) / 65536.0;
+			restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles);
+		}
+
+		restart_phase *= (1.0 - FRAME.explosiveness);
+
+		if (FRAME.system_phase > FRAME.prev_system_phase) {
+			// restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed
+
+			if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) {
+				restart = true;
+				if (params.use_fractional_delta) {
+					local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
+				}
+			}
+
+		} else if (FRAME.delta > 0.0) {
+			if (restart_phase >= FRAME.prev_system_phase) {
+				restart = true;
+				if (params.use_fractional_delta) {
+					local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime;
+				}
+
+			} else if (restart_phase < FRAME.system_phase) {
+				restart = true;
+				if (params.use_fractional_delta) {
+					local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
+				}
+			}
+		}
+
+		uint current_cycle = FRAME.cycle;
+
+		if (FRAME.system_phase < restart_phase) {
+			current_cycle -= uint(1);
+		}
+
+		uint particle_number = current_cycle * uint(params.total_particles) + particle;
+
+		if (restart) {
+			PARTICLE.is_active = FRAME.emitting;
+			restart_position = true;
+			restart_rotation_scale = true;
+			restart_velocity = true;
+			restart_color = true;
+			restart_custom = true;
+		}
+	}
+
+	if (restart && PARTICLE.is_active) {
+#CODE : START
+	}
+
 	if (PARTICLE.is_active) {
 		for (uint i = 0; i < FRAME.attractor_count; i++) {
 			vec3 dir;
@@ -434,116 +539,7 @@ void main() {
 		}
 	}
 
-	if (params.sub_emitter_mode) {
-		if (!PARTICLE.is_active) {
-			int src_index = atomicAdd(src_particles.particle_count, -1) - 1;
-
-			if (src_index >= 0) {
-				PARTICLE.is_active = true;
-				restart = true;
-
-				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_POSITION)) {
-					PARTICLE.xform[3] = src_particles.data[src_index].xform[3];
-				} else {
-					PARTICLE.xform[3] = vec4(0, 0, 0, 1);
-					restart_position = true;
-				}
-				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_ROTATION_SCALE)) {
-					PARTICLE.xform[0] = src_particles.data[src_index].xform[0];
-					PARTICLE.xform[1] = src_particles.data[src_index].xform[1];
-					PARTICLE.xform[2] = src_particles.data[src_index].xform[2];
-				} else {
-					PARTICLE.xform[0] = vec4(1, 0, 0, 0);
-					PARTICLE.xform[1] = vec4(0, 1, 0, 0);
-					PARTICLE.xform[2] = vec4(0, 0, 1, 0);
-					restart_rotation_scale = true;
-				}
-				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_VELOCITY)) {
-					PARTICLE.velocity = src_particles.data[src_index].velocity;
-				} else {
-					PARTICLE.velocity = vec3(0);
-					restart_velocity = true;
-				}
-				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_COLOR)) {
-					PARTICLE.color = src_particles.data[src_index].color;
-				} else {
-					PARTICLE.color = vec4(1);
-					restart_color = true;
-				}
-
-				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_CUSTOM)) {
-					PARTICLE.custom = src_particles.data[src_index].custom;
-				} else {
-					PARTICLE.custom = vec4(0);
-					restart_custom = true;
-				}
-			}
-		}
-
-	} else if (FRAME.emitting) {
-		float restart_phase = float(index) / float(params.total_particles);
-
-		if (FRAME.randomness > 0.0) {
-			uint seed = FRAME.cycle;
-			if (restart_phase >= FRAME.system_phase) {
-				seed -= uint(1);
-			}
-			seed *= uint(params.total_particles);
-			seed += uint(index);
-			float random = float(hash(seed) % uint(65536)) / 65536.0;
-			restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles);
-		}
-
-		restart_phase *= (1.0 - FRAME.explosiveness);
-
-		if (FRAME.system_phase > FRAME.prev_system_phase) {
-			// restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed
-
-			if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) {
-				restart = true;
-				if (params.use_fractional_delta) {
-					local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
-				}
-			}
-
-		} else if (FRAME.delta > 0.0) {
-			if (restart_phase >= FRAME.prev_system_phase) {
-				restart = true;
-				if (params.use_fractional_delta) {
-					local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime;
-				}
-
-			} else if (restart_phase < FRAME.system_phase) {
-				restart = true;
-				if (params.use_fractional_delta) {
-					local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
-				}
-			}
-		}
-
-		uint current_cycle = FRAME.cycle;
-
-		if (FRAME.system_phase < restart_phase) {
-			current_cycle -= uint(1);
-		}
-
-		uint particle_number = current_cycle * uint(params.total_particles) + particle;
-
-		if (restart) {
-			PARTICLE.is_active = FRAME.emitting;
-			restart_position = true;
-			restart_rotation_scale = true;
-			restart_velocity = true;
-			restart_color = true;
-			restart_custom = true;
-		}
-	}
-
 	if (PARTICLE.is_active) {
-		/* clang-format off */
-
-COMPUTE_SHADER_CODE
-
-		/* clang-format on */
+#CODE : PROCESS
 	}
 }
diff --git a/servers/rendering/renderer_rd/shaders/particles_copy.glsl b/servers/rendering/renderer_rd/shaders/particles_copy.glsl
index 6c782b6045..80adb49619 100644
--- a/servers/rendering/renderer_rd/shaders/particles_copy.glsl
+++ b/servers/rendering/renderer_rd/shaders/particles_copy.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/resolve.glsl
index e83c4ca93b..2286a26485 100644
--- a/servers/rendering/renderer_rd/shaders/resolve.glsl
+++ b/servers/rendering/renderer_rd/shaders/resolve.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl
index 464895928a..7b964675ca 100644
--- a/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl
+++ b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
index 76edec1cb6..fce17c47e8 100644
--- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "scene_forward_clustered_inc.glsl"
 
@@ -81,11 +81,11 @@ layout(location = 5) out vec3 tangent_interp;
 layout(location = 6) out vec3 binormal_interp;
 #endif
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-MATERIAL_UNIFORMS
-	/* clang-format on */
+
+#MATERIAL_UNIFORMS
+
 } material;
 #endif
 
@@ -99,11 +99,7 @@ layout(location = 8) out float dp_clip;
 
 layout(location = 9) out flat uint instance_index;
 
-/* clang-format off */
-
-VERTEX_SHADER_GLOBALS
-
-/* clang-format on */
+#GLOBALS
 
 void main() {
 	vec4 instance_custom = vec4(0.0);
@@ -230,11 +226,7 @@ void main() {
 	mat3 modelview_normal = mat3(scene_data.inv_camera_matrix) * world_normal_matrix;
 
 	{
-		/* clang-format off */
-
-VERTEX_SHADER_CODE
-
-		/* clang-format on */
+#CODE : VERTEX
 	}
 
 // using local coordinates (default)
@@ -325,7 +317,7 @@ VERTEX_SHADER_CODE
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #include "scene_forward_clustered_inc.glsl"
 
@@ -372,19 +364,15 @@ layout(location = 9) in flat uint instance_index;
 #define LIGHT_TRANSMITTANCE_USED
 #endif
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-MATERIAL_UNIFORMS
-	/* clang-format on */
-} material;
-#endif
 
-/* clang-format off */
+#MATERIAL_UNIFORMS
 
-FRAGMENT_SHADER_GLOBALS
+} material;
+#endif
 
-/* clang-format on */
+#GLOBALS
 
 #ifdef MODE_RENDER_DEPTH
 
@@ -581,18 +569,14 @@ void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation,
 #endif
 		inout vec3 diffuse_light, inout vec3 specular_light) {
 
-#if defined(USE_LIGHT_SHADER_CODE)
+#if defined(LIGHT_CODE_USED)
 	// light is written by the light shader
 
 	vec3 normal = N;
 	vec3 light = L;
 	vec3 view = V;
 
-	/* clang-format off */
-
-LIGHT_SHADER_CODE
-
-	/* clang-format on */
+#CODE : LIGHT
 
 #else
 
@@ -794,7 +778,7 @@ LIGHT_SHADER_CODE
 	alpha = min(alpha, clamp(1.0 - attenuation), 0.0, 1.0));
 #endif
 
-#endif //defined(USE_LIGHT_SHADER_CODE)
+#endif //defined(LIGHT_CODE_USED)
 }
 
 #ifndef USE_NO_SHADOWS
@@ -1925,11 +1909,7 @@ void main() {
 #endif // ALPHA_ANTIALIASING_EDGE_USED
 
 	{
-		/* clang-format off */
-
-FRAGMENT_SHADER_CODE
-
-		/* clang-format on */
+#CODE : FRAGMENT
 	}
 
 #ifdef LIGHT_TRANSMITTANCE_USED
diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl
index 06dc4b13de..78e0a85341 100644
--- a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl
+++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl
index a5afe74cb2..62d1cffb0a 100644
--- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl
+++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl
index 218605a962..7e06516d90 100644
--- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl
+++ b/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
index e4c3f3a84b..8b58796962 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl
index 08da283dad..0eacbc5363 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define MAX_CASCADES 8
 
@@ -153,7 +153,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) out vec4 frag_color;
 
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
index dc7238abed..99db35bb34 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl
deleted file mode 100644
index 69d8824d8a..0000000000
--- a/servers/rendering/renderer_rd/shaders/sdfgi_fields.glsl
+++ /dev/null
@@ -1,182 +0,0 @@
-/* clang-format off */
-[compute]
-
-#version 450
-
-VERSION_DEFINES
-
-layout(local_size_x = OCT_RES, local_size_y = OCT_RES, local_size_z = 1) in;
-
-/* clang-format on */
-
-#define MAX_CASCADES 8
-
-layout(rgba16f, set = 0, binding = 1) uniform restrict image2DArray irradiance_texture;
-layout(rg16f, set = 0, binding = 2) uniform restrict image2DArray depth_texture;
-
-layout(rgba32ui, set = 0, binding = 3) uniform restrict uimage2DArray irradiance_history_texture;
-layout(rg32ui, set = 0, binding = 4) uniform restrict uimage2DArray depth_history_texture;
-
-struct CascadeData {
-	vec3 offset; //offset of (0,0,0) in world coordinates
-	float to_cell; // 1/bounds * grid_size
-};
-
-layout(set = 0, binding = 5, std140) uniform Cascades {
-	CascadeData data[MAX_CASCADES];
-}
-cascades;
-
-#define DEPTH_HISTORY_BITS 24
-#define IRRADIANCE_HISTORY_BITS 16
-
-layout(push_constant, binding = 0, std430) uniform Params {
-	vec3 grid_size;
-	uint max_cascades;
-
-	uint probe_axis_size;
-	uint cascade;
-	uint history_size;
-	uint pad0;
-
-	ivec3 scroll; //scroll in probes
-	uint pad1;
-}
-params;
-
-void main() {
-	ivec2 local = ivec2(gl_LocalInvocationID.xy);
-	ivec2 probe = ivec2(gl_WorkGroupID.xy);
-
-	ivec3 probe_cell;
-	probe_cell.x = probe.x % int(params.probe_axis_size);
-	probe_cell.y = probe.y;
-	probe_cell.z = probe.x / int(params.probe_axis_size);
-
-#ifdef MODE_SCROLL_BEGIN
-
-	ivec3 read_cell = probe_cell - params.scroll;
-
-	uint src_layer = (params.history_size + 1) * params.cascade;
-	uint dst_layer = (params.history_size + 1) * params.max_cascades;
-
-	for (uint i = 0; i <= params.history_size; i++) {
-		ivec3 write_pos = ivec3(probe * OCT_RES + local, int(i));
-
-		if (any(lessThan(read_pos, ivec3(0))) || any(greaterThanEqual(read_pos, ivec3(params.probe_axis_size)))) {
-			// nowhere to read from for scrolling, try finding the value from upper probes
-
-#ifdef MODE_IRRADIANCE
-			imageStore(irradiance_history_texture, write_pos, uvec4(0));
-#endif
-#ifdef MODE_DEPTH
-			imageStore(depth_history_texture, write_pos, uvec4(0));
-#endif
-		} else {
-			ivec3 read_pos;
-			read_pos.xy = read_cell.xy;
-			read_pos.x += read_cell.z * params.probe_axis_size;
-			read_pos.xy = read_pos.xy * OCT_RES + local;
-			read_pos.z = int(i);
-
-#ifdef MODE_IRRADIANCE
-			uvec4 value = imageLoad(irradiance_history_texture, read_pos);
-			imageStore(irradiance_history_texture, write_pos, value);
-#endif
-#ifdef MODE_DEPTH
-			uvec2 value = imageLoad(depth_history_texture, read_pos);
-			imageStore(depth_history_texture, write_pos, value);
-#endif
-		}
-	}
-
-#endif // MODE_SCROLL_BEGIN
-
-#ifdef MODE_SCROLL_END
-
-	uint src_layer = (params.history_size + 1) * params.max_cascades;
-	uint dst_layer = (params.history_size + 1) * params.cascade;
-
-	for (uint i = 0; i <= params.history_size; i++) {
-		ivec3 pos = ivec3(probe * OCT_RES + local, int(i));
-
-#ifdef MODE_IRRADIANCE
-		uvec4 value = imageLoad(irradiance_history_texture, read_pos);
-		imageStore(irradiance_history_texture, write_pos, value);
-#endif
-#ifdef MODE_DEPTH
-		uvec2 value = imageLoad(depth_history_texture, read_pos);
-		imageStore(depth_history_texture, write_pos, value);
-#endif
-	}
-
-#endif //MODE_SCROLL_END
-
-#ifdef MODE_STORE
-
-	uint src_layer = (params.history_size + 1) * params.cascade + params.history_size;
-	ivec3 read_pos = ivec3(probe * OCT_RES + local, int(src_layer));
-
-	ivec3 write_pos = ivec3(probe * (OCT_RES + 2) + ivec2(1), int(params.cascade));
-
-	ivec3 copy_to[4] = ivec3[](write_pos, ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2));
-
-#ifdef MODE_IRRADIANCE
-	uvec4 average = imageLoad(irradiance_history_texture, read_pos);
-	vec4 light_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS);
-
-#endif
-#ifdef MODE_DEPTH
-	uvec2 value = imageLoad(depth_history_texture, read_pos);
-	vec2 depth_accum = vec4(average / params.history_size) / float(1 << IRRADIANCE_HISTORY_BITS);
-
-	float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell;
-	float max_depth = length(params.grid_size / cascades.data[params.max_cascades - 1].to_cell);
-	max_depth /= probe_cell_size;
-
-	depth_value = (vec2(average / params.history_size) / float(1 << DEPTH_HISTORY_BITS)) * vec2(max_depth, max_depth * max_depth);
-
-#endif
-
-	/* Fill the border if required */
-
-	if (local == ivec2(0, 0)) {
-		copy_to[1] = texture_pos + ivec3(OCT_RES - 1, -1, 0);
-		copy_to[2] = texture_pos + ivec3(-1, OCT_RES - 1, 0);
-		copy_to[3] = texture_pos + ivec3(OCT_RES, OCT_RES, 0);
-	} else if (local == ivec2(OCT_RES - 1, 0)) {
-		copy_to[1] = texture_pos + ivec3(0, -1, 0);
-		copy_to[2] = texture_pos + ivec3(OCT_RES, OCT_RES - 1, 0);
-		copy_to[3] = texture_pos + ivec3(-1, OCT_RES, 0);
-	} else if (local == ivec2(0, OCT_RES - 1)) {
-		copy_to[1] = texture_pos + ivec3(-1, 0, 0);
-		copy_to[2] = texture_pos + ivec3(OCT_RES - 1, OCT_RES, 0);
-		copy_to[3] = texture_pos + ivec3(OCT_RES, -1, 0);
-	} else if (local == ivec2(OCT_RES - 1, OCT_RES - 1)) {
-		copy_to[1] = texture_pos + ivec3(0, OCT_RES, 0);
-		copy_to[2] = texture_pos + ivec3(OCT_RES, 0, 0);
-		copy_to[3] = texture_pos + ivec3(-1, -1, 0);
-	} else if (local.y == 0) {
-		copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y - 1, 0);
-	} else if (local.x == 0) {
-		copy_to[1] = texture_pos + ivec3(local.x - 1, OCT_RES - local.y - 1, 0);
-	} else if (local.y == OCT_RES - 1) {
-		copy_to[1] = texture_pos + ivec3(OCT_RES - local.x - 1, local.y + 1, 0);
-	} else if (local.x == OCT_RES - 1) {
-		copy_to[1] = texture_pos + ivec3(local.x + 1, OCT_RES - local.y - 1, 0);
-	}
-
-	for (int i = 0; i < 4; i++) {
-		if (copy_to[i] == ivec3(-2, -2, -2)) {
-			continue;
-		}
-#ifdef MODE_IRRADIANCE
-		imageStore(irradiance_texture, copy_to[i], light_accum);
-#endif
-#ifdef MODE_DEPTH
-		imageStore(depth_texture, copy_to[i], vec4(depth_value, 0.0, 0.0));
-#endif
-	}
-
-#endif // MODE_STORE
-}
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
index 007e4c113a..bc376e9522 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl
index 916c60ac89..aa4ded146f 100644
--- a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl
+++ b/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #ifdef MODE_JUMPFLOOD_OPTIMIZED
 #define GROUP_SIZE 8
diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl
index 680d1045cd..669ffc961d 100644
--- a/servers/rendering/renderer_rd/shaders/skeleton.glsl
+++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/sky.glsl b/servers/rendering/renderer_rd/shaders/sky.glsl
index 6c985e1f5c..9924da37d5 100644
--- a/servers/rendering/renderer_rd/shaders/sky.glsl
+++ b/servers/rendering/renderer_rd/shaders/sky.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) out vec2 uv_interp;
 
@@ -24,7 +24,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define M_PI 3.14159265359
 
@@ -88,13 +88,9 @@ layout(set = 0, binding = 3, std140) uniform DirectionalLights {
 
 directional_lights;
 
-#ifdef USE_MATERIAL_UNIFORMS
+#ifdef MATERIAL_UNIFORMS_USED
 layout(set = 1, binding = 0, std140) uniform MaterialUniforms{
-	/* clang-format off */
-
-MATERIAL_UNIFORMS
-
-	/* clang-format on */
+#MATERIAL_UNIFORMS
 } material;
 #endif
 
@@ -127,11 +123,7 @@ layout(set = 3, binding = 0) uniform texture3D volumetric_fog_texture;
 #define AT_QUARTER_RES_PASS false
 #endif
 
-/* clang-format off */
-
-FRAGMENT_SHADER_GLOBALS
-
-/* clang-format on */
+#GLOBALS
 
 layout(location = 0) out vec4 frag_color;
 
@@ -202,22 +194,10 @@ void main() {
 #endif
 #endif
 
-// unused, just here to make our compiler happy, make sure we don't execute any light code the user adds in..
-#ifndef REALLYINCLUDETHIS
-	{
-		/* clang-format off */
-
-LIGHT_SHADER_CODE
-
-		/* clang-format on */
-	}
-#endif
 	{
-		/* clang-format off */
 
-FRAGMENT_SHADER_CODE
+#CODE : SKY
 
-		/* clang-format on */
 	}
 
 	frag_color.rgb = color * params.position_multiplier.w;
diff --git a/servers/rendering/renderer_rd/shaders/sort.glsl b/servers/rendering/renderer_rd/shaders/sort.glsl
index e5ebb9c64b..307e60dc21 100644
--- a/servers/rendering/renderer_rd/shaders/sort.glsl
+++ b/servers/rendering/renderer_rd/shaders/sort.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 // Original version here:
 // https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders
diff --git a/servers/rendering/renderer_rd/shaders/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/specular_merge.glsl
index 0b8f406213..3579c35cce 100644
--- a/servers/rendering/renderer_rd/shaders/specular_merge.glsl
+++ b/servers/rendering/renderer_rd/shaders/specular_merge.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) out vec2 uv_interp;
 
@@ -17,7 +17,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in vec2 uv_interp;
 
diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl
index 231f8f91ec..6e945edfcd 100644
--- a/servers/rendering/renderer_rd/shaders/ssao.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao.glsl
@@ -21,7 +21,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 #define SSAO_ADAPTIVE_TAP_BASE_COUNT 5
 
diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
index 510a777048..d9cd2b4e85 100644
--- a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl
@@ -21,7 +21,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl
index cb2d31f70d..ee0db6a6f0 100644
--- a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl
@@ -21,7 +21,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl
index 6aa7624261..687fe1e6e2 100644
--- a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl
@@ -21,7 +21,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl
index 4fdf334aa5..0907423d5d 100644
--- a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl
+++ b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl
@@ -20,7 +20,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl
index 88a953562f..9367b641c2 100644
--- a/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl
+++ b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
diff --git a/servers/rendering/renderer_rd/shaders/tonemap.glsl b/servers/rendering/renderer_rd/shaders/tonemap.glsl
index 7de91fd541..86b4da6b08 100644
--- a/servers/rendering/renderer_rd/shaders/tonemap.glsl
+++ b/servers/rendering/renderer_rd/shaders/tonemap.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) out vec2 uv_interp;
 
@@ -16,7 +16,7 @@ void main() {
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 layout(location = 0) in vec2 uv_interp;
 
diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
index ce8a459b24..cace607667 100644
--- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
+++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl
@@ -2,7 +2,7 @@
 
 #version 450
 
-VERSION_DEFINES
+#VERSION_DEFINES
 
 /* Do not use subgroups here, seems there is not much advantage and causes glitches
 #if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic)
diff --git a/servers/rendering/renderer_scene.h b/servers/rendering/renderer_scene.h
index b546001843..551d4f4240 100644
--- a/servers/rendering/renderer_scene.h
+++ b/servers/rendering/renderer_scene.h
@@ -69,7 +69,7 @@ public:
 	virtual void instance_set_transform(RID p_instance, const Transform &p_transform) = 0;
 	virtual void instance_attach_object_instance_id(RID p_instance, ObjectID p_id) = 0;
 	virtual void instance_set_blend_shape_weight(RID p_instance, int p_shape, float p_weight) = 0;
-	virtual void instance_set_surface_material(RID p_instance, int p_surface, RID p_material) = 0;
+	virtual void instance_set_surface_override_material(RID p_instance, int p_surface, RID p_material) = 0;
 	virtual void instance_set_visible(RID p_instance, bool p_visible) = 0;
 
 	virtual void instance_set_custom_aabb(RID p_instance, AABB p_aabb) = 0;
diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp
index e8155e4025..2c865186b4 100644
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -752,7 +752,7 @@ void RendererSceneCull::instance_set_blend_shape_weight(RID p_instance, int p_sh
 	}
 }
 
-void RendererSceneCull::instance_set_surface_material(RID p_instance, int p_surface, RID p_material) {
+void RendererSceneCull::instance_set_surface_override_material(RID p_instance, int p_surface, RID p_material) {
 	Instance *instance = instance_owner.getornull(p_instance);
 	ERR_FAIL_COND(!instance);
 
diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h
index 32f4334288..d7d59665ec 100644
--- a/servers/rendering/renderer_scene_cull.h
+++ b/servers/rendering/renderer_scene_cull.h
@@ -840,7 +840,7 @@ public:
 	virtual void instance_set_transform(RID p_instance, const Transform &p_transform);
 	virtual void instance_attach_object_instance_id(RID p_instance, ObjectID p_id);
 	virtual void instance_set_blend_shape_weight(RID p_instance, int p_shape, float p_weight);
-	virtual void instance_set_surface_material(RID p_instance, int p_surface, RID p_material);
+	virtual void instance_set_surface_override_material(RID p_instance, int p_surface, RID p_material);
 	virtual void instance_set_visible(RID p_instance, bool p_visible);
 
 	virtual void instance_set_custom_aabb(RID p_instance, AABB p_aabb);
diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h
index e82d5cc3f8..683a22fd9a 100644
--- a/servers/rendering/rendering_server_default.h
+++ b/servers/rendering/rendering_server_default.h
@@ -703,7 +703,7 @@ public:
 	FUNC2(instance_set_transform, RID, const Transform &)
 	FUNC2(instance_attach_object_instance_id, RID, ObjectID)
 	FUNC3(instance_set_blend_shape_weight, RID, int, float)
-	FUNC3(instance_set_surface_material, RID, int, RID)
+	FUNC3(instance_set_surface_override_material, RID, int, RID)
 	FUNC2(instance_set_visible, RID, bool)
 
 	FUNC2(instance_set_custom_aabb, RID, AABB)
diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp
index 4ae0eda232..f5228f9747 100644
--- a/servers/rendering/shader_language.cpp
+++ b/servers/rendering/shader_language.cpp
@@ -3109,20 +3109,20 @@ bool ShaderLanguage::_validate_varying_assign(ShaderNode::Varying &p_varying, St
 	}
 	switch (p_varying.stage) {
 		case ShaderNode::Varying::STAGE_UNKNOWN: // first assign
-			if (current_function == String("vertex")) {
+			if (current_function == varying_function_names.vertex) {
 				p_varying.stage = ShaderNode::Varying::STAGE_VERTEX;
-			} else if (current_function == String("fragment")) {
+			} else if (current_function == varying_function_names.fragment) {
 				p_varying.stage = ShaderNode::Varying::STAGE_FRAGMENT;
 			}
 			break;
 		case ShaderNode::Varying::STAGE_VERTEX:
-			if (current_function == String("fragment")) {
+			if (current_function == varying_function_names.fragment) {
 				*r_message = RTR("Varyings which assigned in 'vertex' function may not be reassigned in 'fragment' or 'light'.");
 				return false;
 			}
 			break;
 		case ShaderNode::Varying::STAGE_FRAGMENT:
-			if (current_function == String("vertex")) {
+			if (current_function == varying_function_names.vertex) {
 				*r_message = RTR("Varyings which assigned in 'fragment' function may not be reassigned in 'vertex' or 'light'.");
 				return false;
 			}
@@ -3139,25 +3139,25 @@ bool ShaderLanguage::_validate_varying_using(ShaderNode::Varying &p_varying, Str
 			*r_message = RTR("Varying must be assigned before using!");
 			return false;
 		case ShaderNode::Varying::STAGE_VERTEX:
-			if (current_function == String("fragment")) {
+			if (current_function == varying_function_names.fragment) {
 				p_varying.stage = ShaderNode::Varying::STAGE_VERTEX_TO_FRAGMENT;
-			} else if (current_function == String("light")) {
+			} else if (current_function == varying_function_names.light) {
 				p_varying.stage = ShaderNode::Varying::STAGE_VERTEX_TO_LIGHT;
 			}
 			break;
 		case ShaderNode::Varying::STAGE_FRAGMENT:
-			if (current_function == String("light")) {
+			if (current_function == varying_function_names.light) {
 				p_varying.stage = ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT;
 			}
 			break;
 		case ShaderNode::Varying::STAGE_VERTEX_TO_FRAGMENT:
-			if (current_function == String("light")) {
+			if (current_function == varying_function_names.light) {
 				*r_message = RTR("Varying must only be used in two different stages, which can be 'vertex' 'fragment' and 'light'");
 				return false;
 			}
 			break;
 		case ShaderNode::Varying::STAGE_VERTEX_TO_LIGHT:
-			if (current_function == String("fragment")) {
+			if (current_function == varying_function_names.fragment) {
 				*r_message = RTR("Varying must only be used in two different stages, which can be 'vertex' 'fragment' and 'light'");
 				return false;
 			}
@@ -5847,7 +5847,7 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 			//check return type
 			BlockNode *b = p_block;
 
-			if (b && b->parent_function && (b->parent_function->name == "vertex" || b->parent_function->name == "fragment" || b->parent_function->name == "light")) {
+			if (b && b->parent_function && p_function_info.main_function) {
 				_set_error(vformat("Using 'return' in '%s' processor function results in undefined behavior!", b->parent_function->name));
 				return ERR_PARSE_ERROR;
 			}
@@ -7246,26 +7246,12 @@ Error ShaderLanguage::_parse_shader(const Map<StringName, FunctionInfo> &p_funct
 }
 
 bool ShaderLanguage::has_builtin(const Map<StringName, ShaderLanguage::FunctionInfo> &p_functions, const StringName &p_name) {
-	if (p_functions.has("vertex")) {
-		if (p_functions["vertex"].built_ins.has(p_name)) {
-			return true;
-		}
-	}
-	if (p_functions.has("fragment")) {
-		if (p_functions["fragment"].built_ins.has(p_name)) {
-			return true;
-		}
-	}
-	if (p_functions.has("light")) {
-		if (p_functions["light"].built_ins.has(p_name)) {
-			return true;
-		}
-	}
-	if (p_functions.has("compute")) {
-		if (p_functions["compute"].built_ins.has(p_name)) {
+	for (Map<StringName, ShaderLanguage::FunctionInfo>::Element *E = p_functions.front(); E; E = E->next()) {
+		if (E->get().built_ins.has(p_name)) {
 			return true;
 		}
 	}
+
 	return false;
 }
 
@@ -7399,11 +7385,12 @@ String ShaderLanguage::get_shader_type(const String &p_code) {
 	return String();
 }
 
-Error ShaderLanguage::compile(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func) {
+Error ShaderLanguage::compile(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const VaryingFunctionNames &p_varying_function_names, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func) {
 	clear();
 
 	code = p_code;
 	global_var_get_type_func = p_global_variable_type_func;
+	varying_function_names = p_varying_function_names;
 
 	nodes = nullptr;
 
@@ -7416,10 +7403,11 @@ Error ShaderLanguage::compile(const String &p_code, const Map<StringName, Functi
 	return OK;
 }
 
-Error ShaderLanguage::complete(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func, List<ScriptCodeCompletionOption> *r_options, String &r_call_hint) {
+Error ShaderLanguage::complete(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const VaryingFunctionNames &p_varying_function_names, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func, List<ScriptCodeCompletionOption> *r_options, String &r_call_hint) {
 	clear();
 
 	code = p_code;
+	varying_function_names = p_varying_function_names;
 
 	nodes = nullptr;
 	global_var_get_type_func = p_global_variable_type_func;
diff --git a/servers/rendering/shader_language.h b/servers/rendering/shader_language.h
index 14594b039c..03327f9677 100644
--- a/servers/rendering/shader_language.h
+++ b/servers/rendering/shader_language.h
@@ -331,6 +331,17 @@ public:
 		MAX_INSTANCE_UNIFORM_INDICES = 16
 	};
 
+	struct VaryingFunctionNames {
+		StringName fragment;
+		StringName vertex;
+		StringName light;
+		VaryingFunctionNames() {
+			fragment = "fragment";
+			vertex = "vertex";
+			light = "light";
+		}
+	};
+
 	struct Node {
 		Node *next = nullptr;
 
@@ -769,7 +780,8 @@ public:
 		Map<StringName, BuiltInInfo> built_ins;
 		Map<StringName, StageFunctionInfo> stage_functions;
 
-		bool can_discard;
+		bool can_discard = false;
+		bool main_function = false;
 	};
 	static bool has_builtin(const Map<StringName, ShaderLanguage::FunctionInfo> &p_functions, const StringName &p_name);
 
@@ -796,6 +808,8 @@ private:
 	StringName current_function;
 	bool last_const = false;
 
+	VaryingFunctionNames varying_function_names;
+
 	TkPos _get_tkpos() {
 		TkPos tkp;
 		tkp.char_idx = char_idx;
@@ -898,8 +912,8 @@ public:
 	void clear();
 
 	static String get_shader_type(const String &p_code);
-	Error compile(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func);
-	Error complete(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func, List<ScriptCodeCompletionOption> *r_options, String &r_call_hint);
+	Error compile(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const VaryingFunctionNames &p_varying_function_names, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func);
+	Error complete(const String &p_code, const Map<StringName, FunctionInfo> &p_functions, const Vector<StringName> &p_render_modes, const VaryingFunctionNames &p_varying_function_names, const Set<String> &p_shader_types, GlobalVariableGetTypeFunc p_global_variable_type_func, List<ScriptCodeCompletionOption> *r_options, String &r_call_hint);
 
 	String get_error_text();
 	int get_error_line();
diff --git a/servers/rendering/shader_types.cpp b/servers/rendering/shader_types.cpp
index e99b8504bb..460fd5fc97 100644
--- a/servers/rendering/shader_types.cpp
+++ b/servers/rendering/shader_types.cpp
@@ -74,6 +74,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["CUSTOM2"] = ShaderLanguage::TYPE_VEC4;
 	shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["CUSTOM3"] = ShaderLanguage::TYPE_VEC4;
 	shader_modes[RS::SHADER_SPATIAL].functions["vertex"].can_discard = false;
+	shader_modes[RS::SHADER_SPATIAL].functions["vertex"].main_function = true;
 
 	//builtins
 	shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["WORLD_MATRIX"] = ShaderLanguage::TYPE_MAT4;
@@ -139,6 +140,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["RADIANCE"] = ShaderLanguage::TYPE_VEC4;
 	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["IRRADIANCE"] = ShaderLanguage::TYPE_VEC4;
 	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].can_discard = true;
+	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].main_function = true;
 
 	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["ALPHA_SCISSOR_THRESHOLD"] = ShaderLanguage::TYPE_FLOAT;
 	shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["ALPHA_HASH_SCALE"] = ShaderLanguage::TYPE_FLOAT;
@@ -171,6 +173,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["ALPHA"] = ShaderLanguage::TYPE_FLOAT;
 
 	shader_modes[RS::SHADER_SPATIAL].functions["light"].can_discard = true;
+	shader_modes[RS::SHADER_SPATIAL].functions["light"].main_function = true;
 
 	//order used puts first enum mode (default) first
 	shader_modes[RS::SHADER_SPATIAL].modes.push_back("blend_mix");
@@ -236,6 +239,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["vertex"].built_ins["AT_LIGHT_PASS"] = constt(ShaderLanguage::TYPE_BOOL);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["vertex"].built_ins["TEXTURE_PIXEL_SIZE"] = constt(ShaderLanguage::TYPE_VEC2);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["vertex"].can_discard = false;
+	shader_modes[RS::SHADER_CANVAS_ITEM].functions["vertex"].main_function = true;
 
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].built_ins["VERTEX"] = ShaderLanguage::TYPE_VEC2;
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].built_ins["SHADOW_VERTEX"] = ShaderLanguage::TYPE_VEC2;
@@ -257,6 +261,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].built_ins["AT_LIGHT_PASS"] = constt(ShaderLanguage::TYPE_BOOL);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].built_ins["SCREEN_TEXTURE"] = constt(ShaderLanguage::TYPE_SAMPLER2D);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].can_discard = true;
+	shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].main_function = true;
 
 	{
 		ShaderLanguage::StageFunctionInfo func;
@@ -294,6 +299,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["light"].built_ins["TEXTURE_PIXEL_SIZE"] = constt(ShaderLanguage::TYPE_VEC2);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["light"].built_ins["POINT_COORD"] = constt(ShaderLanguage::TYPE_VEC2);
 	shader_modes[RS::SHADER_CANVAS_ITEM].functions["light"].can_discard = true;
+	shader_modes[RS::SHADER_CANVAS_ITEM].functions["light"].main_function = true;
 
 	shader_modes[RS::SHADER_CANVAS_ITEM].modes.push_back("skip_vertex_transform");
 
@@ -310,34 +316,50 @@ ShaderTypes::ShaderTypes() {
 	/************ PARTICLES **************************/
 
 	shader_modes[RS::SHADER_PARTICLES].functions["global"].built_ins["TIME"] = constt(ShaderLanguage::TYPE_FLOAT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["COLOR"] = ShaderLanguage::TYPE_VEC4;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["VELOCITY"] = ShaderLanguage::TYPE_VEC3;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["MASS"] = ShaderLanguage::TYPE_FLOAT;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["ACTIVE"] = ShaderLanguage::TYPE_BOOL;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["CUSTOM"] = ShaderLanguage::TYPE_VEC4;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["TRANSFORM"] = ShaderLanguage::TYPE_MAT4;
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["LIFETIME"] = constt(ShaderLanguage::TYPE_FLOAT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["DELTA"] = constt(ShaderLanguage::TYPE_FLOAT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["NUMBER"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["INDEX"] = constt(ShaderLanguage::TYPE_INT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["EMISSION_TRANSFORM"] = constt(ShaderLanguage::TYPE_MAT4);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RANDOM_SEED"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["FLAG_EMIT_POSITION"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["FLAG_EMIT_ROT_SCALE"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["FLAG_EMIT_VELOCITY"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["FLAG_EMIT_COLOR"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["FLAG_EMIT_CUSTOM"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART_POSITION"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART_ROT_SCALE"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART_VELOCITY"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART_COLOR"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["RESTART_CUSTOM"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["COLLIDED"] = constt(ShaderLanguage::TYPE_BOOL);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["COLLISION_NORMAL"] = constt(ShaderLanguage::TYPE_VEC3);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["COLLISION_DEPTH"] = constt(ShaderLanguage::TYPE_FLOAT);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].built_ins["ATTRACTOR_FORCE"] = constt(ShaderLanguage::TYPE_VEC3);
-	shader_modes[RS::SHADER_PARTICLES].functions["compute"].can_discard = false;
+
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["COLOR"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["VELOCITY"] = ShaderLanguage::TYPE_VEC3;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["MASS"] = ShaderLanguage::TYPE_FLOAT;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["ACTIVE"] = ShaderLanguage::TYPE_BOOL;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["CUSTOM"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["TRANSFORM"] = ShaderLanguage::TYPE_MAT4;
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["LIFETIME"] = constt(ShaderLanguage::TYPE_FLOAT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["DELTA"] = constt(ShaderLanguage::TYPE_FLOAT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["NUMBER"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["INDEX"] = constt(ShaderLanguage::TYPE_INT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["EMISSION_TRANSFORM"] = constt(ShaderLanguage::TYPE_MAT4);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RANDOM_SEED"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_POSITION"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_ROT_SCALE"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_VELOCITY"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_COLOR"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_CUSTOM"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].main_function = true;
+
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["COLOR"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["VELOCITY"] = ShaderLanguage::TYPE_VEC3;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["MASS"] = ShaderLanguage::TYPE_FLOAT;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["ACTIVE"] = ShaderLanguage::TYPE_BOOL;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["RESTART"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["CUSTOM"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["TRANSFORM"] = ShaderLanguage::TYPE_MAT4;
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["LIFETIME"] = constt(ShaderLanguage::TYPE_FLOAT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["DELTA"] = constt(ShaderLanguage::TYPE_FLOAT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["NUMBER"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["INDEX"] = constt(ShaderLanguage::TYPE_INT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["EMISSION_TRANSFORM"] = constt(ShaderLanguage::TYPE_MAT4);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["RANDOM_SEED"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["FLAG_EMIT_POSITION"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["FLAG_EMIT_ROT_SCALE"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["FLAG_EMIT_VELOCITY"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["FLAG_EMIT_COLOR"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["FLAG_EMIT_CUSTOM"] = constt(ShaderLanguage::TYPE_UINT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["COLLIDED"] = constt(ShaderLanguage::TYPE_BOOL);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["COLLISION_NORMAL"] = constt(ShaderLanguage::TYPE_VEC3);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["COLLISION_DEPTH"] = constt(ShaderLanguage::TYPE_FLOAT);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].built_ins["ATTRACTOR_FORCE"] = constt(ShaderLanguage::TYPE_VEC3);
+	shader_modes[RS::SHADER_PARTICLES].functions["process"].main_function = true;
 
 	{
 		ShaderLanguage::StageFunctionInfo emit_vertex_func;
@@ -347,7 +369,7 @@ ShaderTypes::ShaderTypes() {
 		emit_vertex_func.arguments.push_back(ShaderLanguage::StageFunctionInfo::Argument("custom", ShaderLanguage::TYPE_VEC4));
 		emit_vertex_func.arguments.push_back(ShaderLanguage::StageFunctionInfo::Argument("flags", ShaderLanguage::TYPE_UINT));
 		emit_vertex_func.return_type = ShaderLanguage::TYPE_BOOL; //whether it could emit
-		shader_modes[RS::SHADER_PARTICLES].functions["compute"].stage_functions["emit_subparticle"] = emit_vertex_func;
+		shader_modes[RS::SHADER_PARTICLES].functions["process"].stage_functions["emit_subparticle"] = emit_vertex_func;
 	}
 
 	shader_modes[RS::SHADER_PARTICLES].modes.push_back("collision_use_scale");
@@ -384,14 +406,15 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_SKY].functions["global"].built_ins["LIGHT3_COLOR"] = constt(ShaderLanguage::TYPE_VEC3);
 	shader_modes[RS::SHADER_SKY].functions["global"].built_ins["LIGHT3_SIZE"] = constt(ShaderLanguage::TYPE_FLOAT);
 
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["COLOR"] = ShaderLanguage::TYPE_VEC3;
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["ALPHA"] = ShaderLanguage::TYPE_FLOAT;
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["EYEDIR"] = constt(ShaderLanguage::TYPE_VEC3);
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["SCREEN_UV"] = constt(ShaderLanguage::TYPE_VEC2);
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["SKY_COORDS"] = constt(ShaderLanguage::TYPE_VEC2);
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["HALF_RES_COLOR"] = constt(ShaderLanguage::TYPE_VEC4);
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["QUARTER_RES_COLOR"] = constt(ShaderLanguage::TYPE_VEC4);
-	shader_modes[RS::SHADER_SKY].functions["fragment"].built_ins["FOG"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["COLOR"] = ShaderLanguage::TYPE_VEC3;
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["ALPHA"] = ShaderLanguage::TYPE_FLOAT;
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["EYEDIR"] = constt(ShaderLanguage::TYPE_VEC3);
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["SCREEN_UV"] = constt(ShaderLanguage::TYPE_VEC2);
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["SKY_COORDS"] = constt(ShaderLanguage::TYPE_VEC2);
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["HALF_RES_COLOR"] = constt(ShaderLanguage::TYPE_VEC4);
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["QUARTER_RES_COLOR"] = constt(ShaderLanguage::TYPE_VEC4);
+	shader_modes[RS::SHADER_SKY].functions["sky"].built_ins["FOG"] = ShaderLanguage::TYPE_VEC4;
+	shader_modes[RS::SHADER_SKY].functions["sky"].main_function = true;
 
 	shader_modes[RS::SHADER_SKY].modes.push_back("use_half_res_pass");
 	shader_modes[RS::SHADER_SKY].modes.push_back("use_quarter_res_pass");
diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp
index 809343114c..f8644b5ecb 100644
--- a/servers/rendering_server.cpp
+++ b/servers/rendering_server.cpp
@@ -1706,7 +1706,7 @@ void RenderingServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("instance_set_transform", "instance", "transform"), &RenderingServer::instance_set_transform);
 	ClassDB::bind_method(D_METHOD("instance_attach_object_instance_id", "instance", "id"), &RenderingServer::instance_attach_object_instance_id);
 	ClassDB::bind_method(D_METHOD("instance_set_blend_shape_weight", "instance", "shape", "weight"), &RenderingServer::instance_set_blend_shape_weight);
-	ClassDB::bind_method(D_METHOD("instance_set_surface_material", "instance", "surface", "material"), &RenderingServer::instance_set_surface_material);
+	ClassDB::bind_method(D_METHOD("instance_set_surface_override_material", "instance", "surface", "material"), &RenderingServer::instance_set_surface_override_material);
 	ClassDB::bind_method(D_METHOD("instance_set_visible", "instance", "visible"), &RenderingServer::instance_set_visible);
 	//	ClassDB::bind_method(D_METHOD("instance_set_use_lightmap", "instance", "lightmap_instance", "lightmap"), &RenderingServer::instance_set_use_lightmap);
 	ClassDB::bind_method(D_METHOD("instance_set_custom_aabb", "instance", "aabb"), &RenderingServer::instance_set_custom_aabb);
diff --git a/servers/rendering_server.h b/servers/rendering_server.h
index 6a8bb83ec1..694fae7fde 100644
--- a/servers/rendering_server.h
+++ b/servers/rendering_server.h
@@ -1124,7 +1124,7 @@ public:
 	virtual void instance_set_transform(RID p_instance, const Transform &p_transform) = 0;
 	virtual void instance_attach_object_instance_id(RID p_instance, ObjectID p_id) = 0;
 	virtual void instance_set_blend_shape_weight(RID p_instance, int p_shape, float p_weight) = 0;
-	virtual void instance_set_surface_material(RID p_instance, int p_surface, RID p_material) = 0;
+	virtual void instance_set_surface_override_material(RID p_instance, int p_surface, RID p_material) = 0;
 	virtual void instance_set_visible(RID p_instance, bool p_visible) = 0;
 
 	virtual void instance_set_custom_aabb(RID p_instance, AABB aabb) = 0;
diff --git a/tests/test_shader_lang.cpp b/tests/test_shader_lang.cpp
index a023f35506..2169350c02 100644
--- a/tests/test_shader_lang.cpp
+++ b/tests/test_shader_lang.cpp
@@ -344,7 +344,7 @@ MainLoop *test() {
 	Set<String> types;
 	types.insert("spatial");
 
-	Error err = sl.compile(code, dt, rm, types, nullptr);
+	Error err = sl.compile(code, dt, rm, ShaderLanguage::VaryingFunctionNames(), types, nullptr);
 
 	if (err) {
 		print_line("Error at line: " + rtos(sl.get_error_line()) + ": " + sl.get_error_text());
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 6dca29e856..33ce2423d9 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -86,20 +86,20 @@ It is still possible to build against a system wide ENet but doing so
 will limit its functionality to IPv4 only.
 
 
-## etc2comp
+## etcpak
 
-- Upstream: https://github.com/google/etc2comp
-- Version: git (9cd0f9cae0f32338943699bb418107db61bb66f2, 2017)
-- License: Apache 2.0
+- Upstream: https://github.com/wolfpld/etcpak
+- Version: git (f27daea656ff77671580f838a889e33049430ebd, 2021)
+- License: BSD-3-Clause
 
 Files extracted from upstream source:
 
-- all .cpp and .h files in EtcLib/
-- README.md, LICENSE, AUTHORS
-
-Important: Some files have Godot-made changes.
-They are marked with `// -- GODOT start --` and `// -- GODOT end --`
-comments.
+- Only the files relevant for compression (i.e. `Process*.cpp` and their deps):
+  ```
+  Dither.{cpp,hpp} ForceInline.hpp Math.hpp ProcessCommon.hpp ProcessRGB.{cpp,hpp}
+  ProcessDxtc.{cpp,hpp} Tables.{cpp,hpp} Vector.hpp
+  ```
+- `AUTHORS.txt` and `LICENSE.txt`
 
 
 ## fonts
@@ -344,7 +344,7 @@ File extracted from upstream release tarball:
 ## meshoptimizer
 
 - Upstream: https://github.com/zeux/meshoptimizer
-- Version: git (e3f53f66e7a35b9b8764bee478589d79e34fa698, 2021)
+- Version: 0.16 (95893c0566646434dd675b708d293fcb2d526d08, 2021)
 - License: MIT
 
 Files extracted from upstream repository:
diff --git a/thirdparty/etc2comp/AUTHORS b/thirdparty/etc2comp/AUTHORS
deleted file mode 100644
index e78a7f4d21..0000000000
--- a/thirdparty/etc2comp/AUTHORS
+++ /dev/null
@@ -1,7 +0,0 @@
-# This is the list of Etc2Comp authors for copyright purposes.
-#
-# This does not necessarily list everyone who has contributed code, since in
-# some cases, their employer may be the copyright holder.  To see the full list
-# of contributors, see the revision history in source control.
-Google Inc.
-Blue Shift Inc.
diff --git a/thirdparty/etc2comp/Etc.cpp b/thirdparty/etc2comp/Etc.cpp
deleted file mode 100644
index a5ee706048..0000000000
--- a/thirdparty/etc2comp/Etc.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "Etc.h"
-#include "EtcFilter.h"
-
-#include <string.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	// C-style inteface to the encoder
-	//
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth, 
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uiMaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight, 
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
-	{
-
-		Image image(a_pafSourceRGBA, a_uiSourceWidth,
-					a_uiSourceHeight,
-					a_eErrMetric);
-		image.m_bVerboseOutput = a_bVerboseOutput;
-		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-		*a_ppaucEncodingBits = image.GetEncodingBits();
-		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
-		*a_puiExtendedWidth = image.GetExtendedWidth();
-		*a_puiExtendedHeight = image.GetExtendedHeight();
-		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
-	}
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmapImages,
-		int *a_piEncodingTime_ms, 
-		bool a_bVerboseOutput)
-	{
-		auto mipWidth = a_uiSourceWidth;
-		auto mipHeight = a_uiSourceHeight;
-		int totalEncodingTime = 0;
-		for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++)
-		{
-			float* pImageData = nullptr;
-			float* pMipImage = nullptr;
-
-			if(mip == 0)
-			{
-				pImageData = a_pafSourceRGBA;
-			}
-			else
-			{
-				pMipImage = new float[mipWidth*mipHeight*4];
-				if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) )
-				{
-					pImageData = pMipImage;
-				}
-			}
-
-			if ( pImageData )
-			{
-			
-				Image image(pImageData, mipWidth, mipHeight,	a_eErrMetric);
-
-			image.m_bVerboseOutput = a_bVerboseOutput;
-			image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
-
-			a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr<unsigned char>(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; });
-			a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes();
-			a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth();
-			a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight();
-
-			totalEncodingTime += image.GetEncodingTimeMs();
-			}
-
-			if(pMipImage)
-			{
-				delete[] pMipImage;
-			}
-
-			if (!pImageData)
-			{
-				break;
-			}
-
-			mipWidth >>= 1;
-			mipHeight >>= 1;
-		}
-
-		*a_piEncodingTime_ms = totalEncodingTime;
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
diff --git a/thirdparty/etc2comp/Etc.h b/thirdparty/etc2comp/Etc.h
deleted file mode 100644
index 439388d649..0000000000
--- a/thirdparty/etc2comp/Etc.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-#include "EtcImage.h"
-#include "EtcColor.h"
-#include "EtcErrorMetric.h"
-#include <memory>
-
-#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f)
-#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f)
-#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f)
-
-namespace Etc
-{
-	class Block4x4EncodingBits;
-
-	struct RawImage
-	{
-		int uiExtendedWidth;
-		int uiExtendedHeight;
-		unsigned int uiEncodingBitsBytes;
-		std::shared_ptr<unsigned char> paucEncodingBits;
-	};
-
-
-
-	// C-style inteface to the encoder
-	void Encode(float *a_pafSourceRGBA,
-				unsigned int a_uiSourceWidth,
-				unsigned int a_uiSourceHeight,
-				Image::Format a_format,
-				ErrorMetric a_eErrMetric,
-				float a_fEffort,
-				unsigned int a_uiJobs,
-				unsigned int a_uimaxJobs,
-				unsigned char **a_ppaucEncodingBits,
-				unsigned int *a_puiEncodingBitsBytes,
-				unsigned int *a_puiExtendedWidth,
-				unsigned int *a_puiExtendedHeight,
-				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-	void EncodeMipmaps(float *a_pafSourceRGBA,
-		unsigned int a_uiSourceWidth,
-		unsigned int a_uiSourceHeight,
-		Image::Format a_format,
-		ErrorMetric a_eErrMetric,
-		float a_fEffort,
-		unsigned int a_uiJobs,
-		unsigned int a_uiMaxJobs,
-		unsigned int a_uiMaxMipmaps,
-		unsigned int a_uiMipFilterFlags,
-		RawImage* a_pMipmaps,
-		int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4.cpp b/thirdparty/etc2comp/EtcBlock4x4.cpp
deleted file mode 100644
index 3082fe60db..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* 
-EtcBlock4x4.cpp
-
-Implements the state associated with each 4x4 block of pixels in an image
-
-Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
-alpha of NAN
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcColor.h"
-#include "EtcImage.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-#include "EtcBlock4x4Encoding_R11.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ETC pixels are scanned vertically.  
-	// this mapping is for when someone wants to scan the ETC pixels horizontally
-	const unsigned int Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4::Block4x4(void)
-	{
-		m_pimageSource = nullptr;
-		m_uiSourceH = 0;
-		m_uiSourceV = 0;
-
-		m_sourcealphamix = SourceAlphaMix::UNKNOWN;
-		m_boolBorderPixels = false;
-		m_boolPunchThroughPixels = false;
-
-		m_pencoding = nullptr;
-
-		m_errormetric = ErrorMetric::NUMERIC;
-
-	}
-	Block4x4::~Block4x4()
-	{
-		m_pimageSource = nullptr;
-		if (m_pencoding)
-		{
-			delete m_pencoding;
-			m_pencoding = nullptr;
-		}
-	}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding from a source image
-	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
-	// a_paucEncodingBits is the place to store the final encoding
-	// a_errormetric is used for finding the best encoding
-	//
-	void Block4x4::InitFromSource(Image *a_pimageSource, 
-									unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric)
-	{
-
-		Block4x4();
-
-		m_pimageSource = a_pimageSource;
-		m_uiSourceH = a_uiSourceH;
-		m_uiSourceV = a_uiSourceV;
-		m_errormetric = a_errormetric;
-
-		SetSourcePixels();
-
-		// set block encoder function
-		switch (m_pimageSource->GetFormat())
-		{
-		case Image::Format::ETC1:
-			m_pencoding = new Block4x4Encoding_ETC1;
-			break;
-
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_pencoding = new Block4x4Encoding_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			if (a_errormetric == RGBX)
-			{
-				m_pencoding = new Block4x4Encoding_RGBA8;
-			}
-			else
-			{
-				switch (m_sourcealphamix)
-				{
-				case SourceAlphaMix::OPAQUE:
-					m_pencoding = new Block4x4Encoding_RGBA8_Opaque;
-					break;
-
-				case SourceAlphaMix::TRANSPARENT:
-					m_pencoding = new Block4x4Encoding_RGBA8_Transparent;
-					break;
-
-				case SourceAlphaMix::TRANSLUCENT:
-					m_pencoding = new Block4x4Encoding_RGBA8;
-					break;
-
-				default:
-					assert(0);
-					break;
-				}
-				break;
-			}
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			switch (m_sourcealphamix)
-			{
-			case SourceAlphaMix::OPAQUE:
-				m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
-				break;
-
-			case SourceAlphaMix::TRANSPARENT:
-				m_pencoding = new Block4x4Encoding_RGB8A1_Transparent;
-				break;
-
-			case SourceAlphaMix::TRANSLUCENT:
-				if (m_boolPunchThroughPixels)
-				{
-					m_pencoding = new Block4x4Encoding_RGB8A1;
-				}
-				else
-				{
-					m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
-				}
-				break;
-
-			default:
-				assert(0);
-				break;
-			}
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			m_pencoding = new Block4x4Encoding_R11;
-			break;
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			m_pencoding = new Block4x4Encoding_RG11;
-			break;
-		default:
-			assert(0);
-			break;
-		}
-
-		m_pencoding->InitFromSource(this, m_afrgbaSource,
-									a_paucEncodingBits, a_errormetric);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization of encoding state from a prior encoding using encoding bits
-	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
-	// a_paucEncodingBits is the place to read the prior encoding
-	// a_imageformat is used to determine how to interpret a_paucEncodingBits
-	// a_errormetric was used for the prior encoding
-	//
-	void Block4x4::InitFromEtcEncodingBits(Image::Format a_imageformat,
-											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
-											unsigned char *a_paucEncodingBits,
-											Image *a_pimageSource,
-											ErrorMetric a_errormetric)
-	{
-		Block4x4();
-
-		m_pimageSource = a_pimageSource;
-		m_uiSourceH = a_uiSourceH;
-		m_uiSourceV = a_uiSourceV;
-		m_errormetric = a_errormetric;
-
-		SetSourcePixels();
-
-		// set block encoder function
-		switch (a_imageformat)
-		{
-		case Image::Format::ETC1:
-			m_pencoding = new Block4x4Encoding_ETC1;
-			break;
-
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_pencoding = new Block4x4Encoding_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			m_pencoding = new Block4x4Encoding_RGBA8;
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			m_pencoding = new Block4x4Encoding_RGB8A1;
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			m_pencoding = new Block4x4Encoding_R11;
-			break;
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			m_pencoding = new Block4x4Encoding_RG11;
-			break;
-		default:
-			assert(0);
-			break;
-		}
-
-		m_pencoding->InitFromEncodingBits(this, a_paucEncodingBits, m_afrgbaSource,
-										m_pimageSource->GetErrorMetric());
-
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// set source pixels from m_pimageSource
-	// set m_alphamix
-	//
-	void Block4x4::SetSourcePixels(void)
-	{
-
-		Image::Format imageformat = m_pimageSource->GetFormat();
-
-		// alpha census
-		unsigned int uiTransparentSourcePixels = 0;
-		unsigned int uiOpaqueSourcePixels = 0;
-
-		// copy source to consecutive memory locations
-		// convert from image horizontal scan to block vertical scan
-		unsigned int uiPixel = 0;
-		for (unsigned int uiBlockPixelH = 0; uiBlockPixelH < Block4x4::COLUMNS; uiBlockPixelH++)
-		{
-			unsigned int uiSourcePixelH = m_uiSourceH + uiBlockPixelH;
-
-			for (unsigned int uiBlockPixelV = 0; uiBlockPixelV < Block4x4::ROWS; uiBlockPixelV++)
-			{
-				unsigned int uiSourcePixelV = m_uiSourceV + uiBlockPixelV;
-
-				ColorFloatRGBA *pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
-
-				// if pixel extends beyond source image because of block padding
-				if (pfrgbaSource == nullptr)
-				{
-					m_afrgbaSource[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, NAN);	// denotes border pixel
-					m_boolBorderPixels = true;
-					uiTransparentSourcePixels++;
-				}
-				else
-				{
-					//get teh current pixel data, and store some of the attributes
-					//before capping values to fit the encoder type
-					
-					m_afrgbaSource[uiPixel] = (*pfrgbaSource).ClampRGBA();
-
-					if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX)
-					{
-						m_pimageSource->m_iNumOpaquePixels++;
-					}
-					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
-					{
-						m_pimageSource->m_iNumTransparentPixels++;
-					}
-					else if(m_afrgbaSource[uiPixel].fA > 0.0f && m_afrgbaSource[uiPixel].fA < 1.0f)
-					{
-						m_pimageSource->m_iNumTranslucentPixels++;
-					}
-					else
-					{
-						m_pimageSource->m_numOutOfRangeValues.fA++;
-					}
-
-					if (m_afrgbaSource[uiPixel].fR != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fR++;
-						//make sure we are getting a float between 0-1
-						if (m_afrgbaSource[uiPixel].fR - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fR++;
-						}
-					}
-
-					if (m_afrgbaSource[uiPixel].fG != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fG++;
-						if (m_afrgbaSource[uiPixel].fG - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fG++;
-						}
-					}
-					if (m_afrgbaSource[uiPixel].fB != 0.0f)
-					{
-						m_pimageSource->m_numColorValues.fB++;
-						if (m_afrgbaSource[uiPixel].fB - 1.0f > 0.0f)
-						{
-							m_pimageSource->m_numOutOfRangeValues.fB++;
-						}
-					}
-					// for formats with no alpha, set source alpha to 1
-					if (imageformat == Image::Format::ETC1 ||
-						imageformat == Image::Format::RGB8 ||
-						imageformat == Image::Format::SRGB8)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-					}
-
-					if (imageformat == Image::Format::R11 ||
-						imageformat == Image::Format::SIGNED_R11)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-						m_afrgbaSource[uiPixel].fG = 0.0f;
-						m_afrgbaSource[uiPixel].fB = 0.0f;
-					}
-
-					if (imageformat == Image::Format::RG11 ||
-						imageformat == Image::Format::SIGNED_RG11)
-					{
-						m_afrgbaSource[uiPixel].fA = 1.0f;
-						m_afrgbaSource[uiPixel].fB = 0.0f;
-					}
-
-				
-					// for RGB8A1, set source alpha to 0.0 or 1.0
-					// set punch through flag
-					if (imageformat == Image::Format::RGB8A1 ||
-						imageformat == Image::Format::SRGB8A1)
-					{
-						if (m_afrgbaSource[uiPixel].fA >= 0.5f)
-						{
-							m_afrgbaSource[uiPixel].fA = 1.0f;
-						}
-						else
-						{
-							m_afrgbaSource[uiPixel].fA = 0.0f;
-							m_boolPunchThroughPixels = true;
-						}
-					}
-
-					if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX)
-					{
-						uiOpaqueSourcePixels++;
-					}
-					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
-					{
-						uiTransparentSourcePixels++;
-					}
-
-				}
-
-				uiPixel += 1;
-			}
-		}
-
-		if (uiOpaqueSourcePixels == PIXELS)
-		{
-			m_sourcealphamix = SourceAlphaMix::OPAQUE;
-		}
-		else if (uiTransparentSourcePixels == PIXELS)
-		{
-			m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
-		}
-		else
-		{
-			m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a name for the encoding mode
-	//
-	const char * Block4x4::GetEncodingModeName(void)
-	{
-
-		switch (m_pencoding->GetMode())
-		{
-		case Block4x4Encoding::MODE_ETC1:
-			return "ETC1";
-		case Block4x4Encoding::MODE_T:
-			return "T";
-		case Block4x4Encoding::MODE_H:
-			return "H";
-		case Block4x4Encoding::MODE_PLANAR:
-			return "PLANAR";
-		default:
-			return "???";
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4.h b/thirdparty/etc2comp/EtcBlock4x4.h
deleted file mode 100644
index 0fd30c598d..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColor.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcErrorMetric.h"
-#include "EtcImage.h"
-#include "EtcBlock4x4Encoding.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits;
-
-	class Block4x4
-	{
-	public:
-
-		static const unsigned int ROWS = 4;
-		static const unsigned int COLUMNS = 4;
-		static const unsigned int PIXELS = ROWS * COLUMNS;
-
-		// the alpha mix for a 4x4 block of pixels
-		enum class SourceAlphaMix
-		{
-			UNKNOWN,
-			//
-			OPAQUE,			// all 1.0
-			TRANSPARENT,	// all 0.0 or NAN
-			TRANSLUCENT		// not all opaque or transparent
-		};
-
-		typedef void (Block4x4::*EncoderFunctionPtr)(void);
-
-		Block4x4(void);
-		~Block4x4();
-		void InitFromSource(Image *a_pimageSource,
-							unsigned int a_uiSourceH,
-							unsigned int a_uiSourceV,
-							unsigned char *a_paucEncodingBits,
-							ErrorMetric a_errormetric);
-
-		void InitFromEtcEncodingBits(Image::Format a_imageformat,
-										unsigned int a_uiSourceH,
-										unsigned int a_uiSourceV,
-										unsigned char *a_paucEncodingBits,
-										Image *a_pimageSource,
-										ErrorMetric a_errormetric);
-
-		// return true if final iteration was performed
-		inline void PerformEncodingIteration(float a_fEffort)
-		{
-			m_pencoding->PerformIteration(a_fEffort);
-		}
-
-		inline void SetEncodingBitsFromEncoding(void)
-		{
-			m_pencoding->SetEncodingBits();
-		}
-
-		inline unsigned int GetSourceH(void)
-		{
-			return m_uiSourceH;
-		}
-
-		inline unsigned int GetSourceV(void)
-		{
-			return m_uiSourceV;
-		}
-
-		inline float GetError(void)
-		{
-			return m_pencoding->GetError();
-		}
-
-		static const unsigned int s_auiPixelOrderHScan[PIXELS];
-
-		inline ColorFloatRGBA * GetDecodedColors(void)
-		{
-			return m_pencoding->GetDecodedColors();
-		}
-
-		inline float * GetDecodedAlphas(void)
-		{
-			return m_pencoding->GetDecodedAlphas();
-		}
-
-		inline Block4x4Encoding::Mode GetEncodingMode(void)
-		{
-			return m_pencoding->GetMode();
-		}
-
-		inline bool GetFlip(void)
-		{
-			return m_pencoding->GetFlip();
-		}
-
-		inline bool IsDifferential(void)
-		{
-			return m_pencoding->IsDifferential();
-		}
-
-		inline ColorFloatRGBA * GetSource()
-		{
-			return m_afrgbaSource;
-		}
-
-		inline ErrorMetric GetErrorMetric()
-		{
-			return m_errormetric;
-		}
-
-		const char * GetEncodingModeName(void);
-
-		inline Block4x4Encoding * GetEncoding(void)
-		{
-			return m_pencoding;
-		}
-
-		inline SourceAlphaMix GetSourceAlphaMix(void)
-		{
-			return m_sourcealphamix;
-		}
-
-		inline Image * GetImageSource(void)
-		{
-			return m_pimageSource;
-		}
-
-		inline bool HasBorderPixels(void)
-		{
-			return m_boolBorderPixels;
-		}
-
-		inline bool HasPunchThroughPixels(void)
-		{
-			return m_boolPunchThroughPixels;
-		}
-
-	private:
-
-		void SetSourcePixels(void);
-
-		Image				*m_pimageSource;
-		unsigned int		m_uiSourceH;
-		unsigned int		m_uiSourceV;
-		ErrorMetric			m_errormetric;
-		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan
-
-		SourceAlphaMix		m_sourcealphamix;
-		bool				m_boolBorderPixels;			// marked as rgba(NAN, NAN, NAN, NAN)
-		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
-
-		Block4x4Encoding	*m_pencoding;
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp
deleted file mode 100644
index 7a9e68c4cf..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding.cpp
-
-Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
-particular file format (e.g. ETC1, RGB8, RGBA8, R11)
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	//
-	const float Block4x4Encoding::LUMA_WEIGHT = 3.0f;
-	const float Block4x4Encoding::CHROMA_BLUE_WEIGHT = 0.5f;
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding::Block4x4Encoding(void)
-	{
-
-		m_pblockParent = nullptr;
-
-		m_pafrgbaSource = nullptr;
-
-		m_boolBorderPixels = false;
-
-		m_fError = -1.0f;
-
-		m_mode = MODE_UNKNOWN;
-
-		m_uiEncodingIterations = 0;
-		m_boolDone = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
-			m_afDecodedAlphas[uiPixel] = -1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialize the generic encoding for a 4x4 block
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// init the decoded pixels to -1 to mark them as undefined
-	// init the error to -1 to mark it as undefined
-	//
-	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
-								ColorFloatRGBA *a_pafrgbaSource,
-								ErrorMetric a_errormetric)
-	{
-
-		m_pblockParent = a_pblockParent;
-
-		m_pafrgbaSource = a_pafrgbaSource;
-
-		m_boolBorderPixels = m_pblockParent->HasBorderPixels();
-
-		m_fError = -1.0f;
-
-		m_uiEncodingIterations = 0;
-
-		m_errormetric = a_errormetric;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
-			m_afDecodedAlphas[uiPixel] = -1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the error for the block by summing the pixel errors
-	//
-	void Block4x4Encoding::CalcBlockError(void)
-	{
-		m_fError = 0.0f;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], m_afDecodedAlphas[uiPixel],
-										m_pafrgbaSource[uiPixel]);
-		}
-		
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the error between the source pixel and the decoded pixel
-	// the error amount is base on the error metric
-	//
-	float Block4x4Encoding::CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
-											ColorFloatRGBA a_frgbaSourcePixel)
-	{
-
-		// if a border pixel
-		if (isnan(a_frgbaSourcePixel.fA))
-		{
-			return 0.0f;
-		}
-
-		if (m_errormetric == ErrorMetric::RGBA)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDRed = (a_fDecodedAlpha * a_frgbaDecodedColor.fR) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fR);
-			float fDGreen = (a_fDecodedAlpha * a_frgbaDecodedColor.fG) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fG);
-			float fDBlue = (a_fDecodedAlpha * a_frgbaDecodedColor.fB) -
-							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fB);
-
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
-		}
-		else if (m_errormetric == ErrorMetric::RGBX)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDRed = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
-			float fDGreen = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
-			float fDBlue = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
-		}
-		else if (m_errormetric == ErrorMetric::REC709)
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fLuma1 = a_frgbaSourcePixel.fR*0.2126f + a_frgbaSourcePixel.fG*0.7152f + a_frgbaSourcePixel.fB*0.0722f;
-			float fChromaR1 = 0.5f * ((a_frgbaSourcePixel.fR - fLuma1) * (1.0f / (1.0f - 0.2126f)));
-			float fChromaB1 = 0.5f * ((a_frgbaSourcePixel.fB - fLuma1) * (1.0f / (1.0f - 0.0722f)));
-
-			float fLuma2 = a_frgbaDecodedColor.fR*0.2126f +
-							a_frgbaDecodedColor.fG*0.7152f +
-							a_frgbaDecodedColor.fB*0.0722f;
-			float fChromaR2 = 0.5f * ((a_frgbaDecodedColor.fR - fLuma2) * (1.0f / (1.0f - 0.2126f)));
-			float fChromaB2 = 0.5f * ((a_frgbaDecodedColor.fB - fLuma2) * (1.0f / (1.0f - 0.0722f)));
-
-			float fDeltaL = a_frgbaSourcePixel.fA * fLuma1 - a_fDecodedAlpha * fLuma2;
-			float fDeltaCr = a_frgbaSourcePixel.fA * fChromaR1 - a_fDecodedAlpha * fChromaR2;
-			float fDeltaCb = a_frgbaSourcePixel.fA * fChromaB1 - a_fDecodedAlpha * fChromaB2;
-
-			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
-
-			// Favor Luma accuracy over Chroma, and Red over Blue 
-			return LUMA_WEIGHT*fDeltaL*fDeltaL +
-					fDeltaCr*fDeltaCr +
-					CHROMA_BLUE_WEIGHT*fDeltaCb*fDeltaCb +
-					fDAlpha*fDAlpha;
-	#if 0
-			float fDRed = a_frgbaDecodedPixel.fR - a_frgbaSourcePixel.fR;
-			float fDGreen = a_frgbaDecodedPixel.fG - a_frgbaSourcePixel.fG;
-			float fDBlue = a_frgbaDecodedPixel.fB - a_frgbaSourcePixel.fB;
-			return 2.0f * 3.0f * fDeltaL * fDeltaL + fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue;
-#endif
-		}
-		else if (m_errormetric == ErrorMetric::NORMALXYZ)
-		{
-			float fDecodedX = 2.0f * a_frgbaDecodedColor.fR - 1.0f;
-			float fDecodedY = 2.0f * a_frgbaDecodedColor.fG - 1.0f;
-			float fDecodedZ = 2.0f * a_frgbaDecodedColor.fB - 1.0f;
-
-			float fDecodedLength = sqrtf(fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ);
-
-			if (fDecodedLength < 0.5f)
-			{
-				return 1.0f;
-			}
-			else if (fDecodedLength == 0.0f)
-			{
-				fDecodedX = 1.0f;
-				fDecodedY = 0.0f;
-				fDecodedZ = 0.0f;
-			}
-			else
-			{
-				fDecodedX /= fDecodedLength;
-				fDecodedY /= fDecodedLength;
-				fDecodedZ /= fDecodedLength;
-			}
-
-			float fSourceX = 2.0f * a_frgbaSourcePixel.fR - 1.0f;
-			float fSourceY = 2.0f * a_frgbaSourcePixel.fG - 1.0f;
-			float fSourceZ = 2.0f * a_frgbaSourcePixel.fB - 1.0f;
-
-			float fSourceLength = sqrtf(fSourceX*fSourceX + fSourceY*fSourceY + fSourceZ*fSourceZ);
-
-			if (fSourceLength == 0.0f)
-			{
-				fSourceX = 1.0f;
-				fSourceY = 0.0f;
-				fSourceZ = 0.0f;
-			}
-			else
-			{
-				fSourceX /= fSourceLength;
-				fSourceY /= fSourceLength;
-				fSourceZ /= fSourceLength;
-			}
-
-			float fDotProduct = fSourceX*fDecodedX + fSourceY*fDecodedY + fSourceZ*fDecodedZ;
-			float fNormalizedDotProduct = 1.0f - 0.5f * (fDotProduct + 1.0f);
-			float fDotProductError = fNormalizedDotProduct * fNormalizedDotProduct;
-			
-			float fLength2 = fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ;
-			float fLength2Error = fabsf(1.0f - fLength2);
-
-			float fDeltaW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
-			float fErrorW = fDeltaW * fDeltaW;
-
-			return fDotProductError + fLength2Error + fErrorW;
-		}
-		else // ErrorMetric::NUMERIC
-		{
-			assert(a_fDecodedAlpha >= 0.0f);
-
-			float fDX = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
-			float fDY = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
-			float fDZ = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
-			float fDW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
-
-			return fDX*fDX + fDY*fDY + fDZ*fDZ + fDW*fDW;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
-
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.h b/thirdparty/etc2comp/EtcBlock4x4Encoding.h
deleted file mode 100644
index c14c3b8616..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-#include "EtcErrorMetric.h"
-
-#include <assert.h>
-#include <float.h>
-
-namespace Etc
-{
-	class Block4x4;
-
-	// abstract base class for specific encodings
-	class Block4x4Encoding
-	{
-	public:
-
-		static const unsigned int ROWS = 4;
-		static const unsigned int COLUMNS = 4;
-		static const unsigned int PIXELS = ROWS * COLUMNS;
-		static const float LUMA_WEIGHT;
-		static const float CHROMA_BLUE_WEIGHT;
-
-		typedef enum
-		{
-			MODE_UNKNOWN,
-			//
-			MODE_ETC1,
-			MODE_T,
-			MODE_H,
-			MODE_PLANAR,
-			MODE_R11,
-			MODE_RG11,
-			//
-			MODES
-		} Mode;
-
-		Block4x4Encoding(void);
-		//virtual ~Block4x4Encoding(void) =0;
-		virtual ~Block4x4Encoding(void) {}
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-
-									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) = 0;
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-
-											ErrorMetric a_errormetric) = 0;
-
-		// perform an iteration of the encoding
-		// the first iteration must generate a complete, valid (if poor) encoding
-		virtual void PerformIteration(float a_fEffort) = 0;
-
-		void CalcBlockError(void);
-
-		inline float GetError(void)
-		{
-			assert(m_fError >= 0.0f);
-
-			return m_fError;
-		}
-
-		inline ColorFloatRGBA * GetDecodedColors(void)
-		{
-			return m_afrgbaDecodedColors;
-		}
-
-		inline float * GetDecodedAlphas(void)
-		{
-			return m_afDecodedAlphas;
-		}
-
-		virtual void SetEncodingBits(void) = 0;
-
-		virtual bool GetFlip(void) = 0;
-
-		virtual bool IsDifferential(void) = 0;
-
-		virtual bool HasSeverelyBentDifferentialColors(void) const = 0;
-
-		inline Mode GetMode(void)
-		{
-			return m_mode;
-		}
-
-		inline bool IsDone(void)
-		{
-			return m_boolDone;
-		}
-
-		inline void SetDoneIfPerfect()
-		{
-			if (GetError() == 0.0f)
-			{
-				m_boolDone = true;
-			}
-		}
-
-		float CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
-								ColorFloatRGBA a_frgbaSourcePixel);
-
-	protected:
-
-		void Init(Block4x4 *a_pblockParent,
-					ColorFloatRGBA *a_pafrgbaSource,
-
-					ErrorMetric a_errormetric);
-
-		Block4x4		*m_pblockParent;
-		ColorFloatRGBA	*m_pafrgbaSource;
-
-		bool			m_boolBorderPixels;				// if block has any border pixels
-
-		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
-		float			m_afDecodedAlphas[PIXELS];		// decoded alpha component
-		float			m_fError;						// error for RGBA relative to m_pafrgbaSource
-
-		// intermediate encoding
-		Mode			m_mode;
-
-		unsigned int	m_uiEncodingIterations;
-		bool			m_boolDone;						// all iterations have been done
-		ErrorMetric		m_errormetric;
-
-	private:
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h b/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h
deleted file mode 100644
index 4065700379..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4EncodingBits
-	// Base class for Block4x4EncodingBits_XXXX
-	// ################################################################################
-
-	class Block4x4EncodingBits
-	{
-	public:
-
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			RGB8,
-			RGBA8,
-			R11,
-			RG11,
-			RGB8A1,
-			//
-			FORMATS
-		};
-
-		static unsigned int GetBytesPerBlock(Format a_format)
-		{
-			switch (a_format)
-			{
-			case Format::RGB8:
-			case Format::R11:
-			case Format::RGB8A1:
-				return 8;
-				break;
-
-			case Format::RGBA8:
-			case Format::RG11:
-				return 16;
-				break;
-
-			default:
-				return 0;
-				break;
-			}
-
-		}
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_RGB8
-	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_RGB8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-
-		inline Block4x4EncodingBits_RGB8(void)
-		{
-			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
-
-			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
-			{
-				auc[uiByte] = 0;
-			}
-
-		}
-
-		typedef struct
-		{
-			unsigned red2 : 4;
-			unsigned red1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned blue2 : 4;
-			unsigned blue1 : 4;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Individual;
-
-		typedef struct
-		{
-			signed dred2 : 3;
-			unsigned red1 : 5;
-			//
-			signed dgreen2 : 3;
-			unsigned green1 : 5;
-			//
-			signed dblue2 : 3;
-			unsigned blue1 : 5;
-			//
-			unsigned flip : 1;
-			unsigned diff : 1;
-			unsigned cw2 : 3;
-			unsigned cw1 : 3;
-			//
-			unsigned int selectors;
-		} Differential;
-
-		typedef struct
-		{
-			unsigned red1b : 2;
-			unsigned detect2 : 1;
-			unsigned red1a : 2;
-			unsigned detect1 : 3;
-			//
-			unsigned blue1 : 4;
-			unsigned green1 : 4;
-			//
-			unsigned green2 : 4;
-			unsigned red2 : 4;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 2;
-			unsigned blue2 : 4;
-			//
-			unsigned int selectors;
-		} T;
-
-		typedef struct
-		{
-			unsigned green1a : 3;
-			unsigned red1 : 4;
-			unsigned detect1 : 1;
-			//
-			unsigned blue1b : 2;
-			unsigned detect3 : 1;
-			unsigned blue1a : 1;
-			unsigned green1b : 1;
-			unsigned detect2 : 3;
-			//
-			unsigned green2a : 3;
-			unsigned red2 : 4;
-			unsigned blue1c : 1;
-			//
-			unsigned db : 1;
-			unsigned diff : 1;
-			unsigned da : 1;
-			unsigned blue2 : 4;
-			unsigned green2b : 1;
-			//
-			unsigned int selectors;
-		} H;
-
-		typedef struct
-		{
-			unsigned originGreen1 : 1;
-			unsigned originRed : 6;
-			unsigned detect1 : 1;
-			//
-			unsigned originBlue1 : 1;
-			unsigned originGreen2 : 6;
-			unsigned detect2 : 1;
-			//
-			unsigned originBlue3 : 2;
-			unsigned detect4 : 1;
-			unsigned originBlue2 : 2;
-			unsigned detect3 : 3;
-			//
-			unsigned horizRed2 : 1;
-			unsigned diff : 1;
-			unsigned horizRed1 : 5;
-			unsigned originBlue4 : 1;
-			//
-			unsigned horizBlue1: 1;
-			unsigned horizGreen : 7;
-			//
-			unsigned vertRed1 : 3;
-			unsigned horizBlue2 : 5;
-			//
-			unsigned vertGreen1 : 5;
-			unsigned vertRed2 : 3;
-			//
-			unsigned vertBlue : 6;
-			unsigned vertGreen2 : 2;
-		} Planar;
-
-		union
-		{
-			unsigned char auc[BYTES_PER_BLOCK];
-			unsigned long int ul;
-			Individual individual;
-			Differential differential;
-			T t;
-			H h;
-			Planar planar;
-		};
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_A8
-	// Encoding bits for the A portion of RGBA8
-	// ################################################################################
-
-	class Block4x4EncodingBits_A8
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	// ################################################################################
-	// Block4x4EncodingBits_R11
-	// Encoding bits for the R portion of R11
-	// ################################################################################
-
-	class Block4x4EncodingBits_R11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 8;
-		static const unsigned int SELECTOR_BYTES = 6;
-
-		typedef struct
-		{
-			unsigned base : 8;
-			unsigned table : 4;
-			unsigned multiplier : 4;
-			unsigned selectors0 : 8;
-			unsigned selectors1 : 8;
-			unsigned selectors2 : 8;
-			unsigned selectors3 : 8;
-			unsigned selectors4 : 8;
-			unsigned selectors5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-	class Block4x4EncodingBits_RG11
-	{
-	public:
-
-		static const unsigned int BYTES_PER_BLOCK = 16;
-		static const unsigned int SELECTOR_BYTES = 12;
-
-		typedef struct
-		{
-			//Red portion
-			unsigned baseR : 8;
-			unsigned tableIndexR : 4;
-			unsigned multiplierR : 4;
-			unsigned selectorsR0 : 8;
-			unsigned selectorsR1 : 8;
-			unsigned selectorsR2 : 8;
-			unsigned selectorsR3 : 8;
-			unsigned selectorsR4 : 8;
-			unsigned selectorsR5 : 8;
-			//Green portion
-			unsigned baseG : 8;
-			unsigned tableIndexG : 4;
-			unsigned multiplierG : 4;
-			unsigned selectorsG0 : 8;
-			unsigned selectorsG1 : 8;
-			unsigned selectorsG2 : 8;
-			unsigned selectorsG3 : 8;
-			unsigned selectorsG4 : 8;
-			unsigned selectorsG5 : 8;
-		} Data;
-
-		Data data;
-
-	};
-
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
deleted file mode 100644
index a27f74c0d5..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp
+++ /dev/null
@@ -1,1281 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_ETC1.cpp
-
-Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
-used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// pixel processing order if the flip bit = 0 (horizontal split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-	// pixel processing order if the flip bit = 1 (vertical split)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
-	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-	// pixel indices for different block halves
-	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
-	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
-
-	// CW ranges that the ETC1 decoders use
-	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
-	// the first axis in the array is indexed by the CW in the encoding bits
-	// the second axis in the array is indexed by the selector bits
-	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
-	{
-		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
-		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
-		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
-		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
-		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
-		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
-		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
-		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
-	{
-		m_mode = MODE_ETC1;
-		m_boolDiff = false;
-		m_boolFlip = false;
-		m_frgbaColor1 = ColorFloatRGBA();
-		m_frgbaColor2 = ColorFloatRGBA();
-		m_uiCW1 = 0;
-		m_uiCW2 = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_auiSelectors[uiPixel] = 0;
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-		m_boolMostLikelyFlip = false;
-
-		m_fError = -1.0f;
-
-		m_fError1 = -1.0f;
-		m_fError2 = -1.0f;
-		m_boolSeverelyBentDifferentialColors = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-	}
-
-	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_ETC1::InitFromSource(Block4x4 *a_pblockParent,
-												ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-		m_fError = -1.0f;
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_ETC1::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource, 
-														ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-		m_fError = -1.0f;
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
-		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
-		if (m_boolDiff)
-		{
-			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
-			if (iR2 < 0)
-			{
-				iR2 = 0;
-			}
-			else if (iR2 > 31)
-			{
-				iR2 = 31;
-			}
-
-			int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
-			if (iG2 < 0)
-			{
-				iG2 = 0;
-			}
-			else if (iG2 > 31)
-			{
-				iG2 = 31;
-			}
-
-			int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
-			if (iB2 < 0)
-			{
-				iB2 = 0;
-			}
-			else if (iB2 > 31)
-			{
-				iB2 = 31;
-			}
-
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		}
-		else
-		{
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
-		}
-
-		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
-
-		InitFromEncodingBits_Selectors();
-
-		Decode();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// init the selectors from a prior encoding
-	//
-	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
-	{
-
-		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
-
-		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
-		{
-			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
-			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
-			unsigned int uiShift = (unsigned int)(iPixel & 7);
-
-			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
-			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
-
-			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryIndividual(m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryIndividual(!m_boolMostLikelyFlip, 1);
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates1();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates2();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates3();
-			if (a_fEffort <= 99.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
-	{
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-
-		TryIndividual(m_boolMostLikelyFlip, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryIndividual(!m_boolMostLikelyFlip, 0);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// algorithm:
-	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
-	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
-	// for each half:
-	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
-	//		create an error value that is the sum of the distances from the gray line
-	// h_error is the sum of Left and Right errors
-	// v_error is the sum of Top and Bottom errors
-	//
-	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		CalculateSourceAverages();
-
-		float fLeftGrayErrorSum = 0.0f;
-		float fRightGrayErrorSum = 0.0f;
-		float fTopGrayErrorSum = 0.0f;
-		float fBottomGrayErrorSum = 0.0f;
-
-		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-		{
-			ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
-			ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
-			ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
-			ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
-
-			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
-			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
-			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
-			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
-
-			fLeftGrayErrorSum += fLeftGrayError;
-			fRightGrayErrorSum += fRightGrayError;
-			fTopGrayErrorSum += fTopGrayError;
-			fBottomGrayErrorSum += fBottomGrayError;
-		}
-
-		if (DEBUG_PRINT)
-		{
-			printf("\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
-		}
-
-		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
-	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
-	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
-	// weight the averages based on a pixel's alpha
-	//
-	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-
-		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX)
-		{
-			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
-			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
-			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
-			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
-
-			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
-			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
-			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
-			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
-		}
-		else
-		{
-			float afSourceAlpha[PIXELS];
-
-			// treat alpha NAN as 0.0f
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? 
-																		0.0f : 
-																		m_pafrgbaSource[uiPixel].fA;
-			}
-
-			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
-			}
-
-			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
-										afrgbaAlphaWeightedSource[1] +
-										afrgbaAlphaWeightedSource[4] +
-										afrgbaAlphaWeightedSource[5];
-
-			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
-										afrgbaAlphaWeightedSource[3] +
-										afrgbaAlphaWeightedSource[6] +
-										afrgbaAlphaWeightedSource[7];
-
-			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
-										afrgbaAlphaWeightedSource[9] +
-										afrgbaAlphaWeightedSource[12] +
-										afrgbaAlphaWeightedSource[13];
-
-			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
-										afrgbaAlphaWeightedSource[11] +
-										afrgbaAlphaWeightedSource[14] +
-										afrgbaAlphaWeightedSource[15];
-
-			float fWeightSumUL = afSourceAlpha[0] +
-									afSourceAlpha[1] +
-									afSourceAlpha[4] +
-									afSourceAlpha[5];
-
-			float fWeightSumLL = afSourceAlpha[2] +
-									afSourceAlpha[3] +
-									afSourceAlpha[6] +
-									afSourceAlpha[7];
-
-			float fWeightSumUR = afSourceAlpha[8] +
-									afSourceAlpha[9] +
-									afSourceAlpha[12] +
-									afSourceAlpha[13];
-
-			float fWeightSumLR = afSourceAlpha[10] +
-									afSourceAlpha[11] +
-									afSourceAlpha[14] +
-									afSourceAlpha[15];
-
-			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
-			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
-			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
-			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
-
-			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
-			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
-			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
-			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
-
-			// check to see if there is at least 1 pixel with  non-zero alpha
-			// completely transparent block should not make it to this code
-			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
-			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
-
-			if (fWeightSumLeft > 0.0f)
-			{
-				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
-			}
-			if (fWeightSumRight > 0.0f)
-			{
-				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
-			}
-			if (fWeightSumTop > 0.0f)
-			{
-				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
-			}
-			if (fWeightSumBottom > 0.0f)
-			{
-				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
-			}
-
-			if (fWeightSumLeft == 0.0f)
-			{
-				assert(fWeightSumRight > 0.0f);
-				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
-			}
-			if (fWeightSumRight == 0.0f)
-			{
-				assert(fWeightSumLeft > 0.0f);
-				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
-			}
-			if (fWeightSumTop == 0.0f)
-			{
-				assert(fWeightSumBottom > 0.0f);
-				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
-			}
-			if (fWeightSumBottom == 0.0f)
-			{
-				assert(fWeightSumTop > 0.0f);
-				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
-			}
-		}
-
-		
-
-		if (DEBUG_PRINT)
-		{
-			printf("\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
-				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
-				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
-				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
-				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-												int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; 
-				iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-				iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-					iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-					iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-						iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-						iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-																	*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{	
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 individual mode encoding
-	// use a_boolFlip to set the encoding F bit
-	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
-	// replace the encoding if the encoding error is less than previous encoding
-	//
-	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
-
-		Block4x4Encoding_ETC1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryIndividualHalf(&trys.m_half1);
-		encodingTry.TryIndividualHalf(&trys.m_half2);
-
-		// use the best of each half
-		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
-		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
-		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = false;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
-				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
-
-				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_fError = m_fError1 + m_fError2;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try an ETC1 differential mode encoding for a half of a 4x4 block
-	// vary the basecolor components using a radius
-	//
-	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
-			iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 15);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-				iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 15);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-					iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 15);
-
-					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-										*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_ETC1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
-	// calculate the selectors for each half of the block separately
-	// set the block error as the sum of each half's error
-	//
-	void Block4x4Encoding_ETC1::CalculateSelectors()
-	{
-		if (m_boolFlip)
-		{
-			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
-		}
-		else
-		{
-			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
-			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
-		}
-
-		m_fError = m_fError1 + m_fError2;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// choose best selectors for half of the block
-	// calculate the error for half of the block
-	//
-	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-		const unsigned int *pauiPixelMapping)
-	{
-		static const bool DEBUG_PRINT = false;
-
-		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
-		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
-
-		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
-		*pfHalfError = FLT_MAX;
-
-		// try each CW
-		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-		{
-			if (DEBUG_PRINT)
-			{
-				printf("\ncw=%u\n", uiCW);
-			}
-
-			unsigned int auiPixelSelectors[PIXELS / 2];
-			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
-			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				if (DEBUG_PRINT)
-				{
-					printf("\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
-						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
-				}
-
-				ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
-				ColorFloatRGBA frgbaDecodedPixel;
-
-				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-				{
-					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
-
-					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
-
-					float fPixelError;
-					
-					fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[pauiPixelMapping[uiPixel]],
-														*pfrgbaSourcePixel);
-					
-					if (DEBUG_PRINT)
-					{
-						printf("\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f", uiPixel, uiSelector,
-							frgbaDecodedPixel.fR,
-							frgbaDecodedPixel.fG,
-							frgbaDecodedPixel.fB,
-							fPixelError);
-					}
-
-					if (fPixelError < afPixelErrors[uiPixel])
-					{
-						if (DEBUG_PRINT)
-						{
-							printf(" *");
-						}
-
-						auiPixelSelectors[uiPixel] = uiSelector;
-						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
-						afPixelErrors[uiPixel] = fPixelError;
-					}
-
-					if (DEBUG_PRINT)
-					{
-						printf("\n");
-					}
-				}
-			}
-
-			// add up all pixel errors
-			float fCWError = 0.0f;
-			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-			{
-				fCWError += afPixelErrors[uiPixel];
-			}
-			if (DEBUG_PRINT)
-			{
-				printf("\terror %.2f\n", fCWError);
-			}
-
-			// if best CW so far
-			if (fCWError < *pfHalfError)
-			{
-				*pfHalfError = fCWError;
-				*puiCW = uiCW;
-				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-				{
-					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
-					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits(void)
-	{
-		assert(m_mode == MODE_ETC1);
-
-		if (m_boolDiff)
-		{
-			int iRed1 = m_frgbaColor1.IntRed(31.0f);
-			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-			int iRed2 = m_frgbaColor2.IntRed(31.0f);
-			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-			int iDRed2 = iRed2 - iRed1;
-			int iDGreen2 = iGreen2 - iGreen1;
-			int iDBlue2 = iBlue2 - iBlue1;
-
-			assert(iDRed2 >= -4 && iDRed2 < 4);
-			assert(iDGreen2 >= -4 && iDGreen2 < 4);
-			assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
-			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
-			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
-
-			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-		}
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
-		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the selectors in the encoding bits
-	//
-	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
-	{
-
-		m_pencodingbitsRGB8->individual.selectors = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiSelector = m_auiSelectors[uiPixel];
-
-			// set index msb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
-
-			// set index lsb
-			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_ETC1::Decode(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h
deleted file mode 100644
index c0dc84d5d5..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcDifferentialTrys.h"
-#include "EtcIndividualTrys.h"
-
-namespace Etc
-{
-
-	// base class for Block4x4Encoding_RGB8
-	class Block4x4Encoding_ETC1 : public Block4x4Encoding
-	{
-	public:
-
-		Block4x4Encoding_ETC1(void);
-		virtual ~Block4x4Encoding_ETC1(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource, 
-
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		inline virtual bool GetFlip(void)
-		{
-			return m_boolFlip;
-		}
-
-		inline virtual bool IsDifferential(void)
-		{
-			return m_boolDiff;
-		}
-
-		virtual void SetEncodingBits(void);
-
-		void Decode(void);
-
-		inline ColorFloatRGBA GetColor1(void) const
-		{
-			return m_frgbaColor1;
-		}
-
-		inline ColorFloatRGBA GetColor2(void) const
-		{
-			return m_frgbaColor2;
-		}
-
-		inline const unsigned int * GetSelectors(void) const
-		{
-			return m_auiSelectors;
-		}
-
-		inline unsigned int GetCW1(void) const
-		{
-			return m_uiCW1;
-		}
-
-		inline unsigned int GetCW2(void) const
-		{
-			return m_uiCW2;
-		}
-
-		inline bool HasSeverelyBentDifferentialColors(void) const
-		{
-			return m_boolSeverelyBentDifferentialColors;
-		}
-
-	protected:
-
-		static const unsigned int s_auiPixelOrderFlip0[PIXELS];
-		static const unsigned int s_auiPixelOrderFlip1[PIXELS];
-		static const unsigned int s_auiPixelOrderHScan[PIXELS];
-
-		static const unsigned int s_auiLeftPixelMapping[8];
-		static const unsigned int s_auiRightPixelMapping[8];
-		static const unsigned int s_auiTopPixelMapping[8];
-		static const unsigned int s_auiBottomPixelMapping[8];
-
-		static const unsigned int SELECTOR_BITS = 2;
-		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-		static const unsigned int CW_BITS = 3;
-		static const unsigned int CW_RANGES = 1 << CW_BITS;
-
-		static float s_aafCwTable[CW_RANGES][SELECTORS];
-		static unsigned char s_aucDifferentialCwRange[256];
-
-		static const int MAX_DIFFERENTIAL = 3;
-		static const int MIN_DIFFERENTIAL = -4;
-
-		void InitFromEncodingBits_Selectors(void);
-
-		void PerformFirstIteration(void);
-		void CalculateMostLikelyFlip(void);
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
-		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-		void CalculateSelectors();
-		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
-											const unsigned int *pauiPixelMapping);
-
-		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
-		inline float CalcGrayDistance2(ColorFloatRGBA &r_frgbaPixel, 
-										ColorFloatRGBA &r_frgbaTarget)
-		{
-			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
-								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
-								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
-
-			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
-
-			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
-			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
-			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
-
-			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
-		}
-
-		void SetEncodingBits_Selectors(void);
-
-		// intermediate encoding
-		bool			m_boolDiff;
-		bool			m_boolFlip;
-		ColorFloatRGBA	m_frgbaColor1;
-		ColorFloatRGBA	m_frgbaColor2;
-		unsigned int	m_uiCW1;
-		unsigned int	m_uiCW2;
-		unsigned int	m_auiSelectors[PIXELS];
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaSourceAverageLeft;
-		ColorFloatRGBA	m_frgbaSourceAverageRight;
-		ColorFloatRGBA	m_frgbaSourceAverageTop;
-		ColorFloatRGBA	m_frgbaSourceAverageBottom;
-		bool			m_boolMostLikelyFlip;
-
-		// stats
-		float			m_fError1;	// error for Etc1 half 1
-		float			m_fError2;	// error for Etc1 half 2
-		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
-
-		// final encoding
-		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
-
-		private:
-
-		void CalculateSourceAverages(void);
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp
deleted file mode 100644
index 4c012fbbf1..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_R11.cpp
-
-Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// modifier values to use for R11, SR11, RG11 and SRG11
-	float Block4x4Encoding_R11::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS]
-	{
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
-		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-
-		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
-		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
-	{
-
-		m_pencodingbitsR11 = nullptr;
-
-	}
-
-	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_R11::InitFromSource(Block4x4 *a_pblockParent,
-		ColorFloatRGBA *a_pafrgbaSource,
-		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_R11::InitFromEncodingBits(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-			(unsigned char *)m_pencodingbitsR11,
-			a_pafrgbaSource,
-			a_errormetric);
-
-		// init R11 portion
-		{
-			m_mode = MODE_R11;
-			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-			{
-				m_fRedBase = (float)(signed char)m_pencodingbitsR11->data.base;
-			}
-			else
-			{
-				m_fRedBase = (float)(unsigned char)m_pencodingbitsR11->data.base;
-			}
-			m_fRedMultiplier = (float)m_pencodingbitsR11->data.multiplier;
-			m_uiRedModifierTableIndex = m_pencodingbitsR11->data.table;
-
-			unsigned long long int ulliSelectorBits = 0;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors0 << 40;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors1 << 32;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors2 << 24;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors3 << 16;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors4 << 8;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors5;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiRedSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (SELECTORS - 1);
-			}
-
-			// decode the red channel
-			// calc red error
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				float fDecodedPixelData = 0.0f;
-				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-				{
-					fDecodedPixelData = DecodePixelRed(m_fRedBase, m_fRedMultiplier,
-						m_uiRedModifierTableIndex,
-						m_auiRedSelectors[uiPixel]);
-				}
-				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-				{
-					fDecodedPixelData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier,
-						m_uiRedModifierTableIndex,
-						m_auiRedSelectors[uiPixel]);
-				}
-				else
-				{
-					assert(0);
-				}
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fDecodedPixelData, 0.0f, 0.0f, 1.0f);
-			}
-			CalcBlockError();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-		m_mode = MODE_R11;
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			m_fError = FLT_MAX;
-			m_fRedBlockError = FLT_MAX;		// artificially high value
-			CalculateR11(8, 0.0f, 0.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 1:
-			CalculateR11(8, 2.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			if (a_fEffort <= 24.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 2:
-			CalculateR11(8, 12.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			CalculateR11(7, 6.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 4:
-			CalculateR11(6, 3.0f, 1.0f);
-			m_fError = m_fRedBlockError;
-			break;
-
-		case 5:
-			CalculateR11(5, 1.0f, 0.0f);
-			m_fError = m_fRedBlockError;
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base color, multiplier and selectors
-	//
-	// a_uiSelectorsUsed limits the number of selector combinations to try
-	// a_fBaseRadius limits the range of base colors to try
-	// a_fMultiplierRadius limits the range of multipliers to try
-	//
-	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, 
-												float a_fBaseRadius, float a_fMultiplierRadius)
-	{
-		// maps from virtual (monotonic) selector to ETC selector
-		static const unsigned int auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
-
-		// find min/max red
-		float fMinRed = 1.0f;
-		float fMaxRed = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// ignore border pixels
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			float fRed = m_pafrgbaSource[uiPixel].fR;
-
-			if (fRed < fMinRed)
-			{
-				fMinRed = fRed;
-			}
-			if (fRed > fMaxRed)
-			{
-				fMaxRed = fRed;
-			}
-		}
-		assert(fMinRed <= fMaxRed);
-
-		float fRedRange = (fMaxRed - fMinRed);
-
-		// try each modifier table entry							  
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			for (unsigned int uiMinVirtualSelector = 0; 
-					uiMinVirtualSelector <= (8- a_uiSelectorsUsed); 
-					uiMinVirtualSelector++)
-			{
-				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
-
-				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
-				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
-
-				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
-											s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-				float fCenter = fMinRed + fCenterRatio*fRedRange;
-				fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
-				if (fMinBase < 0.0f)
-				{
-					fMinBase = 0.0f;
-				}
-
-				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
-				if (fMaxBase > 1.0f)
-				{
-					fMaxBase = 1.0f;
-				}
-
-				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-				{
-					float fRangeMultiplier = roundf(fRedRange / fTableEntryRange);
-
-					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
-					if (fMinMultiplier < 1.0f)
-					{
-						fMinMultiplier = 0.0f;
-					}
-					else if (fMinMultiplier > 15.0f)
-					{
-						fMinMultiplier = 15.0f;
-					}
-
-					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
-					if (fMaxMultiplier < 1.0f)
-					{
-						fMaxMultiplier = 1.0f;
-					}
-					else if (fMaxMultiplier > 15.0f)
-					{
-						fMaxMultiplier = 15.0f;
-					}
-
-					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-					{
-						// find best selector for each pixel
-						unsigned int auiBestSelectors[PIXELS];
-						float afBestRedError[PIXELS];
-						float afBestPixelRed[PIXELS];
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							float fBestPixelRedError = FLT_MAX;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								float fPixelRed = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
-
-								ColorFloatRGBA frgba(fPixelRed, m_pafrgbaSource[uiPixel].fG,0.0f,1.0f);
-
-								float fPixelRedError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
-
-								if (fPixelRedError < fBestPixelRedError)
-								{
-									fBestPixelRedError = fPixelRedError;
-									auiBestSelectors[uiPixel] = uiSelector;
-									afBestRedError[uiPixel] = fBestPixelRedError;
-									afBestPixelRed[uiPixel] = fPixelRed;
-								}
-							}
-						}
-						float fBlockError = 0.0f;  
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							fBlockError += afBestRedError[uiPixel];
-						}
-						if (fBlockError < m_fRedBlockError)
-						{
-							m_fRedBlockError = fBlockError;
-
-							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-							{
-								m_fRedBase = 255.0f * fBase;
-							}
-							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-							{
-								m_fRedBase = (fBase * 255) - 128;
-							}
-							else
-							{
-								assert(0);
-							}
-							m_fRedMultiplier = fMultiplier;
-							m_uiRedModifierTableIndex = uiTableEntry;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiRedSelectors[uiPixel] = auiBestSelectors[uiPixel];
-								float fBestPixelRed = afBestPixelRed[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fBestPixelRed, 0.0f, 0.0f, 1.0f);
-								m_afDecodedAlphas[uiPixel] = 1.0f;
-							}
-						}
-					}
-				}
-
-			}
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_R11::SetEncodingBits(void)
-	{
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsR11->data.base = (unsigned char)roundf(m_fRedBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsR11->data.base = (signed char)roundf(m_fRedBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsR11->data.table = m_uiRedModifierTableIndex;
-		m_pencodingbitsR11->data.multiplier = (unsigned char)roundf(m_fRedMultiplier);
-
-		unsigned long long int ulliSelectorBits = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiShift = 45 - (3 * uiPixel);
-			ulliSelectorBits |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
-		}
-
-		m_pencodingbitsR11->data.selectors0 = ulliSelectorBits >> 40;
-		m_pencodingbitsR11->data.selectors1 = ulliSelectorBits >> 32;
-		m_pencodingbitsR11->data.selectors2 = ulliSelectorBits >> 24;
-		m_pencodingbitsR11->data.selectors3 = ulliSelectorBits >> 16;
-		m_pencodingbitsR11->data.selectors4 = ulliSelectorBits >> 8;
-		m_pencodingbitsR11->data.selectors5 = ulliSelectorBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h
deleted file mode 100644
index b40c1e0036..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_R11;
-
-	// ################################################################################
-	// Block4x4Encoding_R11
-	// ################################################################################
-
-	class Block4x4Encoding_R11 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		Block4x4Encoding_R11(void);
-		virtual ~Block4x4Encoding_R11(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-			ColorFloatRGBA *a_pafrgbaSource,
-			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-			unsigned char *a_paucEncodingBits,
-			ColorFloatRGBA *a_pafrgbaSource,
-			ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		inline float GetRedBase(void) const
-		{
-			return m_fRedBase;
-		}
-
-		inline float GetRedMultiplier(void) const
-		{
-			return m_fRedMultiplier;
-		}
-
-		inline int GetRedTableIndex(void) const
-		{
-			return m_uiRedModifierTableIndex;
-		}
-
-		inline const unsigned int * GetRedSelectors(void) const
-		{
-			return m_auiRedSelectors;
-		}
-
-	protected:
-
-		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
-		static const unsigned int SELECTOR_BITS = 3;
-		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
-
-		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS];
-
-		void CalculateR11(unsigned int a_uiSelectorsUsed, 
-							float a_fBaseRadius, float a_fMultiplierRadius);
-
-		
-
-	
-		inline float DecodePixelRed(float a_fBase, float a_fMultiplier,
-			unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-		{
-			float fMultiplier = a_fMultiplier;
-			if (fMultiplier <= 0.0f)
-			{
-				fMultiplier = 1.0f / 8.0f;
-			}
-
-			float fPixelRed = a_fBase * 8 + 4 +
-				8 * fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]*255;
-			fPixelRed /= 2047.0f;
-
-			if (fPixelRed < 0.0f)
-			{
-				fPixelRed = 0.0f;
-			}
-			else if (fPixelRed > 1.0f)
-			{
-				fPixelRed = 1.0f;
-			}
-
-			return fPixelRed;
-		}
-
-		Block4x4EncodingBits_R11 *m_pencodingbitsR11;
-
-		float m_fRedBase;
-		float m_fRedMultiplier;
-		float m_fRedBlockError;
-		unsigned int m_uiRedModifierTableIndex;
-		unsigned int m_auiRedSelectors[PIXELS];
-
-		
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp
deleted file mode 100644
index 417835db51..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RG11.cpp
-
-Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RG11.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
-	{
-		m_pencodingbitsRG11 = nullptr;
-	}
-
-	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RG11::InitFromSource(Block4x4 *a_pblockParent,
-		ColorFloatRGBA *a_pafrgbaSource,
-		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RG11::InitFromEncodingBits(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-
-		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-			(unsigned char *)m_pencodingbitsRG11,
-			a_pafrgbaSource,
-			a_errormetric);
-		m_fError = 0.0f;
-
-		{
-			m_mode = MODE_RG11;
-			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-			{
-				m_fRedBase = (float)(signed char)m_pencodingbitsRG11->data.baseR;
-				m_fGrnBase = (float)(signed char)m_pencodingbitsRG11->data.baseG;
-			}
-			else
-			{
-				m_fRedBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseR;
-				m_fGrnBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseG;
-			}
-			m_fRedMultiplier = (float)m_pencodingbitsRG11->data.multiplierR;
-			m_fGrnMultiplier = (float)m_pencodingbitsRG11->data.multiplierG;
-			m_uiRedModifierTableIndex = m_pencodingbitsRG11->data.tableIndexR;
-			m_uiGrnModifierTableIndex = m_pencodingbitsRG11->data.tableIndexG;
-
-			unsigned long long int ulliSelectorBitsR = 0;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR0 << 40;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR1 << 32;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR2 << 24;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR3 << 16;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR4 << 8;
-			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR5;
-
-			unsigned long long int ulliSelectorBitsG = 0;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG0 << 40;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG1 << 32;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG2 << 24;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG3 << 16;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG4 << 8;
-			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG5;
-
-			
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiRedSelectors[uiPixel] = (ulliSelectorBitsR >> uiShift) & (SELECTORS - 1);
-				m_auiGrnSelectors[uiPixel] = (ulliSelectorBitsG >> uiShift) & (SELECTORS - 1);
-			}
-
-			
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				float fRedDecodedData = 0.0f;
-				float fGrnDecodedData = 0.0f;
-				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-				{
-					fRedDecodedData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
-					fGrnDecodedData = DecodePixelRed(m_fGrnBase, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
-				}
-				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-				{
-					fRedDecodedData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
-					fGrnDecodedData = DecodePixelRed(m_fGrnBase + 128, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
-				}
-				else
-				{
-					assert(0);
-				}
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fRedDecodedData, fGrnDecodedData, 0.0f, 1.0f);
-			}
-
-		}
-
-		CalcBlockError();
- 	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			m_fError = FLT_MAX;
-			m_fGrnBlockError = FLT_MAX;		// artificially high value
-			m_fRedBlockError = FLT_MAX;
-			CalculateR11(8, 0.0f, 0.0f);
-			CalculateG11(8, 0.0f, 0.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 1:
-			CalculateR11(8, 2.0f, 1.0f);
-			CalculateG11(8, 2.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			if (a_fEffort <= 24.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 2:
-			CalculateR11(8, 12.0f, 1.0f);
-			CalculateG11(8, 12.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			CalculateR11(7, 6.0f, 1.0f);
-			CalculateG11(7, 6.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 4:
-			CalculateR11(6, 3.0f, 1.0f);
-			CalculateG11(6, 3.0f, 1.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			break;
-
-		case 5:
-			CalculateR11(5, 1.0f, 0.0f);
-			CalculateG11(5, 1.0f, 0.0f);
-			m_fError = (m_fGrnBlockError + m_fRedBlockError);
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base color, multiplier and selectors
-	//
-	// a_uiSelectorsUsed limits the number of selector combinations to try
-	// a_fBaseRadius limits the range of base colors to try
-	// a_fMultiplierRadius limits the range of multipliers to try
-	//
-	void Block4x4Encoding_RG11::CalculateG11(unsigned int a_uiSelectorsUsed,
-		float a_fBaseRadius, float a_fMultiplierRadius)
-	{
-		// maps from virtual (monotonic) selector to etc selector
-		static const unsigned int auiVirtualSelectorMap[8] = { 3, 2, 1, 0, 4, 5, 6, 7 };
-
-		// find min/max Grn
-		float fMinGrn = 1.0f;
-		float fMaxGrn = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// ignore border pixels
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			float fGrn = m_pafrgbaSource[uiPixel].fG;
-
-			if (fGrn < fMinGrn)
-			{
-				fMinGrn = fGrn;
-			}
-			if (fGrn > fMaxGrn)
-			{
-				fMaxGrn = fGrn;
-			}
-		}
-		assert(fMinGrn <= fMaxGrn);
-
-		float fGrnRange = (fMaxGrn - fMinGrn);
-
-		// try each modifier table entry							  
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			for (unsigned int uiMinVirtualSelector = 0;
-			uiMinVirtualSelector <= (8 - a_uiSelectorsUsed);
-				uiMinVirtualSelector++)
-			{
-				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
-
-				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
-				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
-
-				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
-					s_aafModifierTable[uiTableEntry][uiMinSelector];
-
-				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-				float fCenter = fMinGrn + fCenterRatio*fGrnRange;
-				fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
-				if (fMinBase < 0.0f)
-				{
-					fMinBase = 0.0f;
-				}
-
-				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
-				if (fMaxBase > 1.0f)
-				{
-					fMaxBase = 1.0f;
-				}
-
-				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-				{
-					float fRangeMultiplier = roundf(fGrnRange / fTableEntryRange);
-
-					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
-					if (fMinMultiplier < 1.0f)
-					{
-						fMinMultiplier = 0.0f;
-					}
-					else if (fMinMultiplier > 15.0f)
-					{
-						fMinMultiplier = 15.0f;
-					}
-
-					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
-					if (fMaxMultiplier < 1.0f)
-					{
-						fMaxMultiplier = 1.0f;
-					}
-					else if (fMaxMultiplier > 15.0f)
-					{
-						fMaxMultiplier = 15.0f;
-					}
-
-					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-					{
-						// find best selector for each pixel
-						unsigned int auiBestSelectors[PIXELS];
-						float afBestGrnError[PIXELS];
-						float afBestPixelGrn[PIXELS];
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							float fBestPixelGrnError = FLT_MAX;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								//DecodePixelRed is not red channel specific
-								float fPixelGrn = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
-								
-								ColorFloatRGBA frgba(m_pafrgbaSource[uiPixel].fR, fPixelGrn, 0.0f, 1.0f);
-									
-								float fPixelGrnError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
-
-								if (fPixelGrnError < fBestPixelGrnError)
-								{
-									fBestPixelGrnError = fPixelGrnError;
-									auiBestSelectors[uiPixel] = uiSelector;
-									afBestGrnError[uiPixel] = fBestPixelGrnError;
-									afBestPixelGrn[uiPixel] = fPixelGrn;
-								}
-							}
-						}
-						float fBlockError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							fBlockError += afBestGrnError[uiPixel];
-						}
-
-						if (fBlockError < m_fGrnBlockError)
-						{
-							m_fGrnBlockError = fBlockError;
-
-							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-							{
-								m_fGrnBase = 255.0f * fBase;
-							}
-							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-							{
-								m_fGrnBase = (fBase * 255) - 128;
-							}
-							else
-							{
-								assert(0);
-							}
-							m_fGrnMultiplier = fMultiplier;
-							m_uiGrnModifierTableIndex = uiTableEntry;
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiGrnSelectors[uiPixel] = auiBestSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel].fG = afBestPixelGrn[uiPixel];
-								m_afDecodedAlphas[uiPixel] = 1.0f;
-							}
-						}
-					}
-				}
-
-			}
-		}
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RG11::SetEncodingBits(void)
-	{
-		unsigned long long int ulliSelectorBitsR = 0;
-		unsigned long long int ulliSelectorBitsG = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiShift = 45 - (3 * uiPixel);
-			ulliSelectorBitsR |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
-			ulliSelectorBitsG |= ((unsigned long long int)m_auiGrnSelectors[uiPixel]) << uiShift;
-		}
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsRG11->data.baseR = (unsigned char)roundf(m_fRedBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsRG11->data.baseR = (signed char)roundf(m_fRedBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsRG11->data.tableIndexR = m_uiRedModifierTableIndex;
-		m_pencodingbitsRG11->data.multiplierR = (unsigned char)roundf(m_fRedMultiplier);
-
-		m_pencodingbitsRG11->data.selectorsR0 = ulliSelectorBitsR >> 40;
-		m_pencodingbitsRG11->data.selectorsR1 = ulliSelectorBitsR >> 32;
-		m_pencodingbitsRG11->data.selectorsR2 = ulliSelectorBitsR >> 24;
-		m_pencodingbitsRG11->data.selectorsR3 = ulliSelectorBitsR >> 16;
-		m_pencodingbitsRG11->data.selectorsR4 = ulliSelectorBitsR >> 8;
-		m_pencodingbitsRG11->data.selectorsR5 = ulliSelectorBitsR;
-
-		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
-		{
-			m_pencodingbitsRG11->data.baseG = (unsigned char)roundf(m_fGrnBase);
-		}
-		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
-		{
-			m_pencodingbitsRG11->data.baseG = (signed char)roundf(m_fGrnBase);
-		}
-		else
-		{
-			assert(0);
-		}
-		m_pencodingbitsRG11->data.tableIndexG = m_uiGrnModifierTableIndex;
-		m_pencodingbitsRG11->data.multiplierG = (unsigned char)roundf(m_fGrnMultiplier);
-
-		m_pencodingbitsRG11->data.selectorsG0 = ulliSelectorBitsG >> 40;
-		m_pencodingbitsRG11->data.selectorsG1 = ulliSelectorBitsG >> 32;
-		m_pencodingbitsRG11->data.selectorsG2 = ulliSelectorBitsG >> 24;
-		m_pencodingbitsRG11->data.selectorsG3 = ulliSelectorBitsG >> 16;
-		m_pencodingbitsRG11->data.selectorsG4 = ulliSelectorBitsG >> 8;
-		m_pencodingbitsRG11->data.selectorsG5 = ulliSelectorBitsG;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h
deleted file mode 100644
index d4993b8c5f..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcBlock4x4Encoding_R11.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_RG11;
-
-	// ################################################################################
-	// Block4x4Encoding_RG11
-	// ################################################################################
-
-	class Block4x4Encoding_RG11 : public Block4x4Encoding_R11
-	{
-		float m_fGrnBase;
-		float m_fGrnMultiplier;
-		float m_fGrnBlockError;
-		unsigned int m_auiGrnSelectors[PIXELS];
-		unsigned int m_uiGrnModifierTableIndex;
-	public:
-
-		Block4x4Encoding_RG11(void);
-		virtual ~Block4x4Encoding_RG11(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-			ColorFloatRGBA *a_pafrgbaSource,
-
-			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-			unsigned char *a_paucEncodingBits,
-			ColorFloatRGBA *a_pafrgbaSource,
-
-			ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		Block4x4EncodingBits_RG11 *m_pencodingbitsRG11;
-
-		void CalculateG11(unsigned int a_uiSelectorsUsed, float a_fBaseRadius, float a_fMultiplierRadius);
-
-		inline float GetGrnBase(void) const
-		{
-			return m_fGrnBase;
-		}
-
-		inline float GetGrnMultiplier(void) const
-		{
-			return m_fGrnMultiplier;
-		}
-
-		inline int GetGrnTableIndex(void) const
-		{
-			return m_uiGrnModifierTableIndex;
-		}
-
-		inline const unsigned int * GetGrnSelectors(void) const
-		{
-			return m_auiGrnSelectors;
-		}
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
deleted file mode 100644
index 5c7ebed788..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
+++ /dev/null
@@ -1,1730 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8.cpp
-
-Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
-This encoder is also used for the ETC2 subset of file format RGBA8.
-
-Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-#include "EtcMath.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
-	{
-		3.0f / 255.0f,
-		6.0f / 255.0f,
-		11.0f / 255.0f,
-		16.0f / 255.0f,
-		23.0f / 255.0f,
-		32.0f / 255.0f,
-		41.0f / 255.0f,
-		64.0f / 255.0f
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
-	{
-
-		m_pencodingbitsRGB8 = nullptr;
-
-	}
-
-	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-		
-		// handle ETC1 modes
-		Block4x4Encoding_ETC1::InitFromEncodingBits(a_pblockParent,
-													a_paucEncodingBits, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		if (m_pencodingbitsRGB8->differential.diff)
-		{
-			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
-			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-			int iRed2 = iRed1 + iDRed2;
-
-			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
-			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-			int iGreen2 = iGreen1 + iDGreen2;
-
-			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
-			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-			int iBlue2 = iBlue1 + iDBlue2;
-
-			if (iRed2 < 0 || iRed2 > 31)
-			{
-				InitFromEncodingBits_T();
-			}
-			else if (iGreen2 < 0 || iGreen2 > 31)
-			{
-				InitFromEncodingBits_H();
-			}
-			else if (iBlue2 < 0 || iBlue2 > 31)
-			{
-				InitFromEncodingBits_Planar();
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
-	{
-
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
-	{
-
-		m_mode = MODE_H;
-		
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) + 
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if Planar mode is detected
-	//
-	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
-	{
-
-		m_mode = MODE_PLANAR;
-
-		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
-		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
-										m_pencodingbitsRGB8->planar.originGreen2);
-		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
-										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
-										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
-										m_pencodingbitsRGB8->planar.originBlue4);
-
-		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
-									m_pencodingbitsRGB8->planar.horizRed2);
-		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
-		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
-									m_pencodingbitsRGB8->planar.horizBlue2);
-
-		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
-									m_pencodingbitsRGB8->planar.vertRed2);
-		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
-									m_pencodingbitsRGB8->planar.vertGreen2);
-		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
-		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
-
-		DecodePixels_Planar();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			Block4x4Encoding_ETC1::PerformFirstIteration();
-			if (m_boolDone)
-			{
-				break;
-			}
-			TryPlanar(0);
-			SetDoneIfPerfect();
-			if (m_boolDone)
-			{
-				break;
-			}
-			TryTAndH(0);
-			break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
-			break;
-
-		case 3:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 4:
-			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
-			break;
-
-		case 5:
-			TryPlanar(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryTAndH(1);
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 9:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 89.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 10:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in Planar mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		encodingTry.CalculatePlanarCornerColors();
-
-		encodingTry.DecodePixels_Planar();
-
-		encodingTry.CalcBlockError();
-
-		if (a_uiRadius > 0)
-		{
-			encodingTry.TwiddlePlanar();
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_PLANAR;
-			m_boolDiff = true;
-			m_boolFlip = false;
-			m_frgbaColor1 = encodingTry.m_frgbaColor1;
-			m_frgbaColor2 = encodingTry.m_frgbaColor2;
-			m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-			}
-
-			m_fError = encodingTry.m_fError;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode or H mode
-	// save this encoding if it improves the error
-	//
-	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
-	{
-
-		CalculateBaseColorsForTAndH();
-
-		TryT(a_uiRadius);
-
-		TryH(a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate original values for base colors
-	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
-	//
-	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
-	{
-
-		bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX;
-
-		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
-
-		// find pixel farthest from average gray line
-		unsigned int uiFarthestPixel = 0;
-		float fFarthestGrayDistance2 = 0.0f;
-		unsigned int uiTransparentPixels = 0;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			// don't count transparent
-			if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX)
-			{
-				uiTransparentPixels++;
-			}
-			else
-			{
-				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
-
-				if (fGrayDistance2 > fFarthestGrayDistance2)
-				{
-					uiFarthestPixel = uiPixel;
-					fFarthestGrayDistance2 = fGrayDistance2;
-				}
-			}
-		}
-		// a transparent block should not reach this method
-		assert(uiTransparentPixels < PIXELS);
-
-		// set the original base colors to:
-		//		half way to the farthest pixel and
-		//		the mirror color on the other side of the average
-		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
-		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
-		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
-
-		// move base colors to find best fit
-		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
-		{
-			// find the center of pixels closest to each color
-			float fPixelsCloserToColor1 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor1;
-			float fPixelsCloserToColor2 = 0.0f;
-			ColorFloatRGBA frgbSumPixelsCloserToColor2;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				// don't count transparent pixels
-				if (m_pafrgbaSource[uiPixel].fA == 0.0f)
-				{
-					continue;
-				}
-
-				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
-				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
-
-				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * m_pafrgbaSource[uiPixel].fA;
-					
-				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
-				{
-					fPixelsCloserToColor1 += m_pafrgbaSource[uiPixel].fA;
-					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
-				}
-				else
-				{
-					fPixelsCloserToColor2 += m_pafrgbaSource[uiPixel].fA;
-					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
-				}
-			}
-			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
-			{
-				break;
-			}
-
-			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
-			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
-
-			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
-				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
-				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
-				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
-				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
-				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
-			{
-				break;
-			}
-
-			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
-			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = m_frgbaColor2;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-														m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-		
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-		
-		// try each selector
-		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-														m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// use linear regression to find the best fit for colors along the edges of the 4x4 block
-	//
-	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
-	{
-		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
-		ColorFloatRGBA frgbaSlope;
-		ColorFloatRGBA frgbaOffset;
-
-		// top edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[4];
-		afrgbaRegression[2] = m_pafrgbaSource[8];
-		afrgbaRegression[3] = m_pafrgbaSource[12];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = frgbaOffset;
-		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// left edge
-		afrgbaRegression[0] = m_pafrgbaSource[0];
-		afrgbaRegression[1] = m_pafrgbaSource[1];
-		afrgbaRegression[2] = m_pafrgbaSource[2];
-		afrgbaRegression[3] = m_pafrgbaSource[3];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
-		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
-
-		// right edge
-		afrgbaRegression[0] = m_pafrgbaSource[12];
-		afrgbaRegression[1] = m_pafrgbaSource[13];
-		afrgbaRegression[2] = m_pafrgbaSource[14];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
-
-		// bottom edge
-		afrgbaRegression[0] = m_pafrgbaSource[3];
-		afrgbaRegression[1] = m_pafrgbaSource[7];
-		afrgbaRegression[2] = m_pafrgbaSource[11];
-		afrgbaRegression[3] = m_pafrgbaSource[15];
-		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
-		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
-
-		// quantize corner colors to 6/7/6
-		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
-		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
-		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R, G and B independently
-	//
-	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
-	//
-	// return true if improvement
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
-	{
-		bool boolImprovement = false;
-
-		while (TwiddlePlanarR())
-		{
-			boolImprovement = true;
-		}
-
-		while (TwiddlePlanarG())
-		{
-			boolImprovement = true;
-		}
-
-		while (TwiddlePlanarB())
-		{
-			boolImprovement = true;
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing R
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
-		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
-		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
-
-		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
-		{
-			// check for out of range
-			if (iTryOriginRed < 0 || iTryOriginRed > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
-
-			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
-			{
-				// check for out of range
-				if (iTryHorizRed < 0 || iTryHorizRed > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
-
-				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
-				{
-					// check for out of range
-					if (iTryVertRed < 0 || iTryVertRed > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing G
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
-		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
-		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
-
-		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
-		{
-			// check for out of range
-			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
-
-			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
-			{
-				// check for out of range
-				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
-
-				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
-				{
-					// check for out of range
-					if (iTryVertGreen < 0 || iTryVertGreen > 127)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginGreen == iOriginGreen && 
-						iTryHorizGreen == iHorizGreen && 
-						iTryVertGreen == iVertGreen)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try different corner colors by slightly changing B
-	//
-	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
-	{
-		bool boolImprovement = false;
-
-		Block4x4Encoding_RGB8 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_PLANAR;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-		}
-
-		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
-		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
-		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
-
-		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
-		{
-			// check for out of range
-			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
-			{
-				continue;
-			}
-
-			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
-
-			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
-			{
-				// check for out of range
-				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
-				{
-					continue;
-				}
-
-				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
-
-				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
-				{
-					// check for out of range
-					if (iTryVertBlue < 0 || iTryVertBlue > 63)
-					{
-						continue;
-					}
-
-					// don't bother with null twiddle
-					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
-					{
-						continue;
-					}
-
-					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
-
-					encodingTry.DecodePixels_Planar();
-
-					encodingTry.CalcBlockError();
-
-					if (encodingTry.m_fError < m_fError)
-					{
-						m_mode = MODE_PLANAR;
-						m_boolDiff = true;
-						m_boolFlip = false;
-						m_frgbaColor1 = encodingTry.m_frgbaColor1;
-						m_frgbaColor2 = encodingTry.m_frgbaColor2;
-						m_frgbaColor3 = encodingTry.m_frgbaColor3;
-
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-						}
-
-						m_fError = encodingTry.m_fError;
-
-						boolImprovement = true;
-					}
-				}
-			}
-		}
-
-		return boolImprovement;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits(void)
-	{
-
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			Block4x4Encoding_ETC1::SetEncodingBits();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		m_pencodingbitsRGB8->t.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		m_pencodingbitsRGB8->h.diff = 1;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_PLANAR);
-		assert(m_boolDiff == true);
-
-		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
-		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
-		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
-
-		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
-		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
-		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
-
-		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
-		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
-		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
-
-		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
-		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
-		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
-		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
-		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
-		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
-		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
-
-		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
-		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
-		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
-		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
-		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
-
-		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
-		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
-		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
-		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
-		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
-
-		m_pencodingbitsRGB8->planar.diff = 1;
-
-		// create valid RG differentials and an invalid B differential to trigger planar mode
-		m_pencodingbitsRGB8->planar.detect1 = 0;
-		m_pencodingbitsRGB8->planar.detect2 = 0;
-		m_pencodingbitsRGB8->planar.detect3 = 0;
-		m_pencodingbitsRGB8->planar.detect4 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect1 = 1;
-		}
-		if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			m_pencodingbitsRGB8->planar.detect2 = 1;
-		}
-		if (iBlue2 >= 4)
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 7;
-			m_pencodingbitsRGB8->planar.detect4 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->planar.detect3 = 0;
-			m_pencodingbitsRGB8->planar.detect4 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
-
-			// make sure red and green don't overflow and blue does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 >= 0 && iGreen2 <= 31);
-			assert(iBlue2 < 0 || iBlue2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for T mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for H mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				break;
-
-			case 2:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
-	//
-	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
-	{
-
-		int iRO = (int)roundf(m_frgbaColor1.fR * 255.0f);
-		int iGO = (int)roundf(m_frgbaColor1.fG * 255.0f);
-		int iBO = (int)roundf(m_frgbaColor1.fB * 255.0f);
-
-		int iRH = (int)roundf(m_frgbaColor2.fR * 255.0f);
-		int iGH = (int)roundf(m_frgbaColor2.fG * 255.0f);
-		int iBH = (int)roundf(m_frgbaColor2.fB * 255.0f);
-
-		int iRV = (int)roundf(m_frgbaColor3.fR * 255.0f);
-		int iGV = (int)roundf(m_frgbaColor3.fG * 255.0f);
-		int iBV = (int)roundf(m_frgbaColor3.fB * 255.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			int iX = (int)(uiPixel >> 2);
-			int iY = (int)(uiPixel & 3);
-
-			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
-			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
-			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
-
-			ColorFloatRGBA frgba;
-			frgba.fR = (float)iR / 255.0f;
-			frgba.fG = (float)iG / 255.0f;
-			frgba.fB = (float)iB / 255.0f;
-			frgba.fA = 1.0f;
-
-			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
-	//
-	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
-	//
-	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
-	{
-		typedef struct
-		{
-			float f[4];
-		} Float4;
-
-		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
-		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
-		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
-
-		float afX[MAX_PLANAR_REGRESSION_SIZE];
-		float afY[MAX_PLANAR_REGRESSION_SIZE];
-
-		// handle r, g and b separately.  don't bother with a
-		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
-		{
-			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
-			{
-				afX[uiPixel] = (float)uiPixel;
-				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
-				
-			}
-			Etc::Regression(afX, afY, a_uiPixels,
-				&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h
deleted file mode 100644
index 03754d5e3b..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_ETC1.h"
-
-namespace Etc
-{
-
-	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
-	{
-	public:
-
-		Block4x4Encoding_RGB8(void);
-		virtual ~Block4x4Encoding_RGB8(void);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-		
-		virtual void SetEncodingBits(void);
-
-		inline ColorFloatRGBA GetColor3(void) const
-		{
-			return m_frgbaColor3;
-		}
-
-	protected:
-
-		static const unsigned int PLANAR_CORNER_COLORS = 3;
-		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
-		static const unsigned int TH_DISTANCES = 8;
-
-		static float s_afTHDistanceTable[TH_DISTANCES];
-
-		void TryPlanar(unsigned int a_uiRadius);
-		void TryTAndH(unsigned int a_uiRadius);
-
-		void InitFromEncodingBits_Planar(void);
-
-		ColorFloatRGBA	m_frgbaColor3;		// used for planar
-
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-		void SetEncodingBits_Planar(void);
-
-		// state shared between iterations
-		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
-		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
-
-		void CalculateBaseColorsForTAndH(void);
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-	private:
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void CalculatePlanarCornerColors(void);
-
-		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
-			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
-
-		bool TwiddlePlanar(void);
-		bool TwiddlePlanarR();
-		bool TwiddlePlanarG();
-		bool TwiddlePlanarB();
-
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void DecodePixels_Planar(void);
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
deleted file mode 100644
index b94b64e68c..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
+++ /dev/null
@@ -1,1819 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGB8A1.cpp contains:
-	Block4x4Encoding_RGB8A1
-	Block4x4Encoding_RGB8A1_Opaque
-	Block4x4Encoding_RGB8A1_Transparent
-
-These encoders are used when targetting file format RGB8A1.
-
-Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGB8A1.h"
-
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-	
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1
-	// ####################################################################################################
-
-	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
-	{
-		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
-		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
-		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
-		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
-		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
-		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
-		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
-		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
-	{
-		m_pencodingbitsRGB8 = nullptr;
-		m_boolOpaque = false;
-		m_boolTransparent = false;
-		m_boolPunchThroughPixels = true;
-
-	}
-	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGB8A1::InitFromSource(Block4x4 *a_pblockParent,
-													ColorFloatRGBA *a_pafrgbaSource,
-													unsigned char *a_paucEncodingBits,
-													ErrorMetric a_errormetric)
-	{
-
-		Block4x4Encoding_RGB8::InitFromSource(a_pblockParent,
-			a_pafrgbaSource,
-			a_paucEncodingBits,
-			a_errormetric);
-
-		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
-		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
-		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
-			{
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-			else
-			{
-				m_afDecodedAlphas[uiPixel] = 0.0f;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-
-
-		InitFromEncodingBits_ETC1(a_pblockParent,
-			a_paucEncodingBits,
-			a_pafrgbaSource,
-			a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		// detect if there is a T, H or Planar mode present
-		int iRed1 = m_pencodingbitsRGB8->differential.red1;
-		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
-		int iRed2 = iRed1 + iDRed2;
-
-		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
-		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
-		int iGreen2 = iGreen1 + iDGreen2;
-
-		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
-		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
-		int iBlue2 = iBlue1 + iDBlue2;
-
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			InitFromEncodingBits_T();
-		}
-		else if (iGreen2 < 0 || iGreen2 > 31)
-		{
-			InitFromEncodingBits_H();
-		}
-		else if (iBlue2 < 0 || iBlue2 > 31)
-		{
-			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
-	// if it isn't an ETC1 mode, this will be overwritten later
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-		unsigned char *a_paucEncodingBits,
-		ColorFloatRGBA *a_pafrgbaSource,
-		ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
-			a_errormetric);
-
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
-		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
-
-		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
-		if (iR2 < 0)
-		{
-			iR2 = 0;
-		}
-		else if (iR2 > 31)
-		{
-			iR2 = 31;
-		}
-
-		int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
-		if (iG2 < 0)
-		{
-			iG2 = 0;
-		}
-		else if (iG2 > 31)
-		{
-			iG2 = 31;
-		}
-
-		int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
-		if (iB2 < 0)
-		{
-			iB2 = 0;
-		}
-		else if (iB2 > 31)
-		{
-			iB2 = 31;
-		}
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
-
-		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
-		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		Decode_ETC1();
-
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if T mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
-	{
-		m_mode = MODE_T;
-
-		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
-								m_pencodingbitsRGB8->t.red1b);
-		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
-		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
-		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_T();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding if H mode is detected
-	//
-	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
-	{
-		m_mode = MODE_H;
-
-		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
-		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
-									m_pencodingbitsRGB8->h.green1b);
-		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
-								(m_pencodingbitsRGB8->h.blue1b << 1) +
-								m_pencodingbitsRGB8->h.blue1c);
-
-		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
-		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
-									m_pencodingbitsRGB8->h.green2b);
-		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
-
-		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
-		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
-
-		// used to determine the LSB of the CW
-		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
-		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
-
-		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
-		if (uiRGB1 >= uiRGB2)
-		{
-			m_uiCW1++;
-		}
-
-		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
-
-		DecodePixels_H();
-
-		CalcBlockError();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
-	{
-
-		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
-
-		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
-		{
-			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
-			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
-
-			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
-
-			float fDelta;
-			if (m_boolOpaque)
-				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
-			else 
-				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
-
-			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
-			{
-				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-				m_afDecodedAlphas[uiPixel] = 0.0f;
-			}
-			else
-			{
-				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for T mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
-					m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// for H mode, set the decoded colors and decoded alpha based on the encoding state
-	//
-	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			switch (m_auiSelectors[uiPixel])
-			{
-			case 0:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 1:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-
-			case 2:
-				if (m_boolOpaque == false)
-				{
-					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel] = 0.0f;
-				}
-				else
-				{
-					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
-					m_afDecodedAlphas[uiPixel] = 1.0f;
-				}
-				break;
-
-			case 3:
-				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-				break;
-			}
-
-		}
-
-	}
-
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// RGB8A1 can't use individual mode
-	// RGB8A1 with transparent pixels can't use planar mode
-	//
-	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolOpaque);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			if (a_fEffort <= 39.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
-			TryT(1);
-			TryH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 4:
-			TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
-	{
-		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
-													int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-
-		ColorFloatRGBA frgbaColor1;
-		ColorFloatRGBA frgbaColor2;
-
-		const unsigned int *pauiPixelMapping1;
-		const unsigned int *pauiPixelMapping2;
-
-		if (a_boolFlip)
-		{
-			frgbaColor1 = m_frgbaSourceAverageTop;
-			frgbaColor2 = m_frgbaSourceAverageBottom;
-
-			pauiPixelMapping1 = s_auiTopPixelMapping;
-			pauiPixelMapping2 = s_auiBottomPixelMapping;
-		}
-		else
-		{
-			frgbaColor1 = m_frgbaSourceAverageLeft;
-			frgbaColor2 = m_frgbaSourceAverageRight;
-
-			pauiPixelMapping1 = s_auiLeftPixelMapping;
-			pauiPixelMapping2 = s_auiRightPixelMapping;
-		}
-
-		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
-								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
-
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-		encodingTry.m_boolFlip = a_boolFlip;
-
-		encodingTry.TryDifferentialHalf(&trys.m_half1);
-		encodingTry.TryDifferentialHalf(&trys.m_half2);
-
-		// find best halves that are within differential range
-		DifferentialTrys::Try *ptryBest1 = nullptr;
-		DifferentialTrys::Try *ptryBest2 = nullptr;
-		encodingTry.m_fError = FLT_MAX;
-
-		// see if the best of each half are in differential range
-		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
-		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
-		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
-		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
-		{
-			ptryBest1 = trys.m_half1.m_ptryBest;
-			ptryBest2 = trys.m_half2.m_ptryBest;
-			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
-		}
-		else
-		{
-			// else, find the next best halves that are in differential range
-			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
-			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
-				ptry1++)
-			{
-				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
-				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
-					ptry2++)
-				{
-					iDRed = ptry2->m_iRed - ptry1->m_iRed;
-					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
-					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
-					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
-					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
-					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
-
-					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
-					{
-						float fError = ptry1->m_fError + ptry2->m_fError;
-
-						if (fError < encodingTry.m_fError)
-						{
-							encodingTry.m_fError = fError;
-
-							ptryBest1 = ptry1;
-							ptryBest2 = ptry2;
-						}
-					}
-
-				}
-			}
-			assert(encodingTry.m_fError < FLT_MAX);
-			assert(ptryBest1 != nullptr);
-			assert(ptryBest2 != nullptr);
-		}
-
-		if (encodingTry.m_fError < m_fError)
-		{
-			m_mode = MODE_ETC1;
-			m_boolDiff = true;
-			m_boolFlip = encodingTry.m_boolFlip;
-			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
-			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
-			m_uiCW1 = ptryBest1->m_uiCW;
-			m_uiCW2 = ptryBest2->m_uiCW;
-
-			m_fError = 0.0f;
-			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
-			{
-				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
-				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
-
-				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
-				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				m_auiSelectors[uiPixel1] = uiSelector1;
-				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
-
-				if (uiSelector1 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel1] = 0.0f;
-				}
-				else
-				{
-					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
-					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
-					m_afDecodedAlphas[uiPixel1] = 1.0f;
-				}
-
-				if (uiSelector2 == TRANSPARENT_SELECTOR)
-				{
-					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
-					m_afDecodedAlphas[uiPixel2] = 0.0f;
-				}
-				else
-				{
-					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
-					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
-					m_afDecodedAlphas[uiPixel2] = 1.0f;
-				}
-
-				float fDeltaA1 = m_afDecodedAlphas[uiPixel1] - m_pafrgbaSource[uiPixel1].fA;
-				m_fError += fDeltaA1 * fDeltaA1;
-				float fDeltaA2 = m_afDecodedAlphas[uiPixel2] - m_pafrgbaSource[uiPixel2].fA;
-				m_fError += fDeltaA2 * fDeltaA2;
-			}
-
-			m_fError1 = ptryBest1->m_fError;
-			m_fError2 = ptryBest2->m_fError;
-			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
-			m_fError = m_fError1 + m_fError2;
-
-			// sanity check
-			{
-				int iRed1 = m_frgbaColor1.IntRed(31.0f);
-				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-				int iRed2 = m_frgbaColor2.IntRed(31.0f);
-				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-				iDRed = iRed2 - iRed1;
-				iDGreen = iGreen2 - iGreen1;
-				iDBlue = iBlue2 - iBlue1;
-
-				assert(iDRed >= -4 && iDRed < 4);
-				assert(iDGreen >= -4 && iDGreen < 4);
-				assert(iDBlue >= -4 && iDBlue < 4);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// mostly copied from ETC1
-	// differences:
-	//		uses s_aafCwOpaqueUnsetTable
-	//		color for selector set to 0,0,0,0
-	//
-	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
-	{
-
-		a_phalf->m_ptryBest = nullptr;
-		float fBestTryError = FLT_MAX;
-
-		a_phalf->m_uiTrys = 0;
-		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
-		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
-			iRed++)
-		{
-			assert(iRed >= 0 && iRed <= 31);
-
-			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
-			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
-				iGreen++)
-			{
-				assert(iGreen >= 0 && iGreen <= 31);
-
-				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
-				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
-					iBlue++)
-				{
-					assert(iBlue >= 0 && iBlue <= 31);
-
-					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
-					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
-
-					ptry->m_iRed = iRed;
-					ptry->m_iGreen = iGreen;
-					ptry->m_iBlue = iBlue;
-					ptry->m_fError = FLT_MAX;
-					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
-
-					// try each CW
-					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
-					{
-						unsigned int auiPixelSelectors[PIXELS / 2];
-						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
-						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-
-						// pre-compute decoded pixels for each selector
-						ColorFloatRGBA afrgbaSelectors[SELECTORS];
-						assert(SELECTORS == 4);
-						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
-						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
-						afrgbaSelectors[2] = ColorFloatRGBA();
-						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
-
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
-							ColorFloatRGBA frgbaDecodedPixel;
-
-							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
-							{
-								if (pfrgbaSourcePixel->fA < 0.5f)
-								{
-									uiSelector = TRANSPARENT_SELECTOR;
-								}
-								else if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									continue;
-								}
-
-								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
-
-								float fPixelError;
-								
-								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
-																	*pfrgbaSourcePixel);
-
-								if (fPixelError < afPixelErrors[uiPixel])
-								{
-									auiPixelSelectors[uiPixel] = uiSelector;
-									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
-									afPixelErrors[uiPixel] = fPixelError;
-								}
-
-								if (uiSelector == TRANSPARENT_SELECTOR)
-								{
-									break;
-								}
-							}
-						}
-
-						// add up all pixel errors
-						float fCWError = 0.0f;
-						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-						{
-							fCWError += afPixelErrors[uiPixel];
-						}
-
-						// if best CW so far
-						if (fCWError < ptry->m_fError)
-						{
-							ptry->m_uiCW = uiCW;
-							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
-							{
-								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
-							}
-							ptry->m_fError = fCWError;
-						}
-
-					}
-
-					if (ptry->m_fError < fBestTryError)
-					{
-						a_phalf->m_ptryBest = ptry;
-						fBestTryError = ptry->m_fError;
-					}
-
-					assert(ptry->m_fError < FLT_MAX);
-
-					a_phalf->m_uiTrys++;
-				}
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in T mode
-	// save this encoding if it improves the error
-	//
-	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
-	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
-	//
-	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_T;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
-			//
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
-						{
-							if (uiBaseColorSwaps == 0)
-							{
-								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-							}
-							else
-							{
-								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
-								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-							}
-
-							encodingTry.TryT_BestSelectorCombination();
-
-							if (encodingTry.m_fError < m_fError)
-							{
-								m_mode = encodingTry.m_mode;
-								m_boolDiff = encodingTry.m_boolDiff;
-								m_boolFlip = encodingTry.m_boolFlip;
-
-								m_frgbaColor1 = encodingTry.m_frgbaColor1;
-								m_frgbaColor2 = encodingTry.m_frgbaColor2;
-								m_uiCW1 = encodingTry.m_uiCW1;
-
-								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-								{
-									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-								}
-
-								m_fError = encodingTry.m_fError;
-							}
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryT
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
-	{
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = m_frgbaColor1;
-		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-													m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try encoding in H mode
-	// save this encoding if it improves the error
-	//
-	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
-	// TWIDDLE_RADIUS of 2 is WAY too slow
-	//
-	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
-	{
-		Block4x4Encoding_RGB8A1 encodingTry = *this;
-
-		// init "try"
-		{
-			encodingTry.m_mode = MODE_H;
-			encodingTry.m_boolDiff = true;
-			encodingTry.m_boolFlip = false;
-			encodingTry.m_fError = FLT_MAX;
-		}
-
-		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
-		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
-		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
-
-		int iMinRed1 = iColor1Red - (int)a_uiRadius;
-		if (iMinRed1 < 0)
-		{
-			iMinRed1 = 0;
-		}
-		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
-		if (iMaxRed1 > 15)
-		{
-			iMaxRed1 = 15;
-		}
-
-		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-		if (iMinGreen1 < 0)
-		{
-			iMinGreen1 = 0;
-		}
-		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
-		if (iMaxGreen1 > 15)
-		{
-			iMaxGreen1 = 15;
-		}
-
-		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-		if (iMinBlue1 < 0)
-		{
-			iMinBlue1 = 0;
-		}
-		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
-		if (iMaxBlue1 > 15)
-		{
-			iMaxBlue1 = 15;
-		}
-
-		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
-		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
-
-		int iMinRed2 = iColor2Red - (int)a_uiRadius;
-		if (iMinRed2 < 0)
-		{
-			iMinRed2 = 0;
-		}
-		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
-		if (iMaxRed2 > 15)
-		{
-			iMaxRed2 = 15;
-		}
-
-		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-		if (iMinGreen2 < 0)
-		{
-			iMinGreen2 = 0;
-		}
-		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
-		if (iMaxGreen2 > 15)
-		{
-			iMaxGreen2 = 15;
-		}
-
-		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-		if (iMinBlue2 < 0)
-		{
-			iMinBlue2 = 0;
-		}
-		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
-		if (iMaxBlue2 > 15)
-		{
-			iMaxBlue2 = 15;
-		}
-
-		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-		{
-			encodingTry.m_uiCW1 = uiDistance;
-
-			// twiddle m_frgbaOriginalColor1_TAndH
-			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
-			{
-				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
-				{
-					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
-					{
-						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
-						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-			// twiddle m_frgbaOriginalColor2_TAndH
-			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
-			{
-				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
-				{
-					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
-					{
-						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
-						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
-
-						// if color1 == color2, H encoding issues can pop up, so abort
-						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
-						{
-							continue;
-						}
-
-						encodingTry.TryH_BestSelectorCombination();
-
-						if (encodingTry.m_fError < m_fError)
-						{
-							m_mode = encodingTry.m_mode;
-							m_boolDiff = encodingTry.m_boolDiff;
-							m_boolFlip = encodingTry.m_boolFlip;
-
-							m_frgbaColor1 = encodingTry.m_frgbaColor1;
-							m_frgbaColor2 = encodingTry.m_frgbaColor2;
-							m_uiCW1 = encodingTry.m_uiCW1;
-
-							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-							{
-								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
-								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
-							}
-
-							m_fError = encodingTry.m_fError;
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best selector combination for TryH
-	// called on an encodingTry
-	//
-	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
-	{
-
-		// abort if colors and CW will pose an encoding problem
-		{
-			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
-			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
-			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
-			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-
-			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
-			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
-			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
-			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-			unsigned int uiCWLsb = m_uiCW1 & 1;
-
-			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
-				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
-			{
-				return;
-			}
-		}
-
-		float fDistance = s_afTHDistanceTable[m_uiCW1];
-
-		unsigned int auiBestPixelSelectors[PIXELS];
-		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
-											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
-		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
-		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
-
-		assert(SELECTORS == 4);
-		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
-		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
-		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
-		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
-
-
-		// try each selector
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			unsigned int uiMinSelector = 0;
-			unsigned int uiMaxSelector = SELECTORS - 1;
-
-			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
-			{
-				uiMinSelector = 2;
-				uiMaxSelector = 2;
-			}
-
-			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
-			{
-				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
-													m_pafrgbaSource[uiPixel]);
-
-				if (fPixelError < afBestPixelErrors[uiPixel])
-				{
-					afBestPixelErrors[uiPixel] = fPixelError;
-					auiBestPixelSelectors[uiPixel] = uiSelector;
-					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
-				}
-			}
-		}
-		
-
-		// add up all of the pixel errors
-		float fBlockError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			fBlockError += afBestPixelErrors[uiPixel];
-		}
-
-		if (fBlockError < m_fError)
-		{
-			m_fError = fBlockError;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
-				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 1 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 2 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
-	{
-
-		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
-		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 3 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
-		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// try version 4 of the degenerate search
-	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
-	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
-	//		be successfull
-	//
-	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
-	{
-
-		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
-		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
-	{
-		switch (m_mode)
-		{
-		case MODE_ETC1:
-			SetEncodingBits_ETC1();
-			break;
-
-		case MODE_T:
-			SetEncodingBits_T();
-			break;
-
-		case MODE_H:
-			SetEncodingBits_H();
-			break;
-
-		case MODE_PLANAR:
-			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
-			break;
-
-		default:
-			assert(false);
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if ETC1 mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
-	{
-
-		// there is no individual mode in RGB8A1
-		assert(m_boolDiff);
-
-		int iRed1 = m_frgbaColor1.IntRed(31.0f);
-		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
-		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
-
-		int iRed2 = m_frgbaColor2.IntRed(31.0f);
-		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
-		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
-
-		int iDRed2 = iRed2 - iRed1;
-		int iDGreen2 = iGreen2 - iGreen1;
-		int iDBlue2 = iBlue2 - iBlue1;
-
-		assert(iDRed2 >= -4 && iDRed2 < 4);
-		assert(iDGreen2 >= -4 && iDGreen2 < 4);
-		assert(iDBlue2 >= -4 && iDBlue2 < 4);
-
-		m_pencodingbitsRGB8->differential.red1 = iRed1;
-		m_pencodingbitsRGB8->differential.green1 = iGreen1;
-		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
-
-		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
-		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
-		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
-
-		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
-		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
-
-		SetEncodingBits_Selectors();
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if T mode
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_T);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
-		m_pencodingbitsRGB8->t.red1b = uiRed1;
-		m_pencodingbitsRGB8->t.green1 = uiGreen1;
-		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
-
-		m_pencodingbitsRGB8->t.red2 = uiRed2;
-		m_pencodingbitsRGB8->t.green2 = uiGreen2;
-		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
-
-		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
-		m_pencodingbitsRGB8->t.db = m_uiCW1;
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->t.detect1 = 0;
-		m_pencodingbitsRGB8->t.detect2 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		if (iRed2 >= 4)
-		{
-			m_pencodingbitsRGB8->t.detect1 = 7;
-			m_pencodingbitsRGB8->t.detect2 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->t.detect1 = 0;
-			m_pencodingbitsRGB8->t.detect2 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-
-			// make sure red overflows
-			assert(iRed2 < 0 || iRed2 > 31);
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state if H mode
-	//
-	// colors and selectors may need to swap in order to generate lsb of distance index
-	//
-	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
-	{
-		static const bool SANITY_CHECK = true;
-
-		assert(m_mode == MODE_H);
-		assert(m_boolDiff == true);
-
-		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
-		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
-		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
-
-		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
-		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
-		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
-
-		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
-		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
-
-		bool boolOddDistance = m_uiCW1 & 1;
-		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed2;
-			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed1;
-			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.red1 = uiRed1;
-			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
-			m_pencodingbitsRGB8->h.green1b = uiGreen1;
-			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
-			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
-			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
-
-			m_pencodingbitsRGB8->h.red2 = uiRed2;
-			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
-			m_pencodingbitsRGB8->h.green2b = uiGreen2;
-			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
-
-			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
-			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
-		}
-
-		// in RGB8A1 encoding bits, opaque replaces differential
-		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
-
-		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
-
-		if (boolSwapColors)
-		{
-			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
-		}
-
-		// create an invalid R differential to trigger T mode
-		m_pencodingbitsRGB8->h.detect1 = 0;
-		m_pencodingbitsRGB8->h.detect2 = 0;
-		m_pencodingbitsRGB8->h.detect3 = 0;
-		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-		if (iRed2 < 0 || iRed2 > 31)
-		{
-			m_pencodingbitsRGB8->h.detect1 = 1;
-		}
-		if (iGreen2 >= 4)
-		{
-			m_pencodingbitsRGB8->h.detect2 = 7;
-			m_pencodingbitsRGB8->h.detect3 = 0;
-		}
-		else
-		{
-			m_pencodingbitsRGB8->h.detect2 = 0;
-			m_pencodingbitsRGB8->h.detect3 = 1;
-		}
-
-		if (SANITY_CHECK)
-		{
-			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
-			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
-
-			// make sure red doesn't overflow and green does
-			assert(iRed2 >= 0 && iRed2 <= 31);
-			assert(iGreen2 < 0 || iGreen2 > 31);
-		}
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1_Opaque
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1_Opaque::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolPunchThroughPixels);
-		assert(!m_boolTransparent);
-		assert(!m_boolDone);
-
-		switch (m_uiEncodingIterations)
-		{
-		case 0:
-			PerformFirstIteration();
-			break;
-
-		case 1:
-			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 2:
-			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
-			break;
-
-		case 3:
-			Block4x4Encoding_RGB8::TryPlanar(1);
-			break;
-
-		case 4:
-			Block4x4Encoding_RGB8::TryTAndH(1);
-			if (a_fEffort <= 49.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 5:
-			Block4x4Encoding_ETC1::TryDegenerates1();
-			if (a_fEffort <= 59.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 6:
-			Block4x4Encoding_ETC1::TryDegenerates2();
-			if (a_fEffort <= 69.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 7:
-			Block4x4Encoding_ETC1::TryDegenerates3();
-			if (a_fEffort <= 79.5f)
-			{
-				m_boolDone = true;
-			}
-			break;
-
-		case 8:
-			Block4x4Encoding_ETC1::TryDegenerates4();
-			m_boolDone = true;
-			break;
-
-		default:
-			assert(0);
-			break;
-		}
-
-		m_uiEncodingIterations++;
-		SetDoneIfPerfect();
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find best initial encoding to ensure block has a valid encoding
-	//
-	void Block4x4Encoding_RGB8A1_Opaque::PerformFirstIteration(void)
-	{
-		
-		// set decoded alphas
-		// calculate alpha error
-		m_fError = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afDecodedAlphas[uiPixel] = 1.0f;
-
-			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
-			m_fError += fDeltaA * fDeltaA;
-		}
-
-		CalculateMostLikelyFlip();
-
-		m_fError = FLT_MAX;
-
-		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryPlanar(0);
-		SetDoneIfPerfect();
-		if (m_boolDone)
-		{
-			return;
-		}
-		Block4x4Encoding_RGB8::TryTAndH(0);
-		SetDoneIfPerfect();
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGB8A1_Transparent
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGB8A1_Transparent::PerformIteration(float )
-	{
-		assert(!m_boolOpaque);
-		assert(m_boolTransparent);
-		assert(!m_boolDone);
-		assert(m_uiEncodingIterations == 0);
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = false;
-
-		m_uiCW1 = 0;
-		m_uiCW2 = 0;
-
-		m_frgbaColor1 = ColorFloatRGBA();
-		m_frgbaColor2 = ColorFloatRGBA();
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
-
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-			m_afDecodedAlphas[uiPixel] = 0.0f;
-		}
-
-		CalcBlockError();
-
-		m_boolDone = true;
-		m_uiEncodingIterations++;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
deleted file mode 100644
index ff26e462f8..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-#include "EtcErrorMetric.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-namespace Etc
-{
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1
-	// RGB8A1 if not completely opaque or transparent
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		static const unsigned int TRANSPARENT_SELECTOR = 2;
-
-		Block4x4Encoding_RGB8A1(void);
-		virtual ~Block4x4Encoding_RGB8A1(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-									unsigned char *a_paucEncodingBits,
-									ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
-										unsigned char *a_paucEncodingBits,
-										ColorFloatRGBA *a_pafrgbaSource,
-										ErrorMetric a_errormetric);
-
-		void InitFromEncodingBits_T(void);
-		void InitFromEncodingBits_H(void);
-
-		void PerformFirstIteration(void);
-
-		void Decode_ETC1(void);
-		void DecodePixels_T(void);
-		void DecodePixels_H(void);
-		void SetEncodingBits_ETC1(void);
-		void SetEncodingBits_T(void);
-		void SetEncodingBits_H(void);
-
-	protected:
-
-		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
-		bool m_boolTransparent;			// all source pixels have alpha < 0.5
-		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
-
-		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
-
-	private:
-
-		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
-								int a_iGrayOffset1, int a_iGrayOffset2);
-		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
-
-		void TryT(unsigned int a_uiRadius);
-		void TryT_BestSelectorCombination(void);
-		void TryH(unsigned int a_uiRadius);
-		void TryH_BestSelectorCombination(void);
-
-		void TryDegenerates1(void);
-		void TryDegenerates2(void);
-		void TryDegenerates3(void);
-		void TryDegenerates4(void);
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1_Opaque
-	// RGB8A1 if all pixels have alpha==1
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		void PerformFirstIteration(void);
-
-	private:
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGB8A1_Transparent
-	// RGB8A1 if all pixels have alpha==0
-	// ################################################################################
-
-	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-	private:
-
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
deleted file mode 100644
index 600c7ab405..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcBlock4x4Encoding_RGBA8.cpp contains:
-	Block4x4Encoding_RGBA8
-	Block4x4Encoding_RGBA8_Opaque
-	Block4x4Encoding_RGBA8_Transparent
-
-These encoders are used when targetting file format RGBA8.
-
-Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
-Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
-Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcBlock4x4Encoding_RGBA8.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <float.h>
-#include <limits>
-
-namespace Etc
-{
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8
-	// ####################################################################################################
-
-	float Block4x4Encoding_RGBA8::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
-	{
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
-		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
-		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
-
-		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
-		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-
-		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
-		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
-		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
-		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
-	{
-
-		m_pencodingbitsA8 = nullptr;
-
-	}
-	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
-	// ----------------------------------------------------------------------------------------------------
-	// initialization prior to encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits
-	//
-	void Block4x4Encoding_RGBA8::InitFromSource(Block4x4 *a_pblockParent,
-												ColorFloatRGBA *a_pafrgbaSource,
-												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
-	{
-		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
-
-		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// initialization from the encoding bits of a previous encoding
-	// a_pblockParent points to the block associated with this encoding
-	// a_errormetric is used to choose the best encoding
-	// a_pafrgbaSource points to a 4x4 block subset of the source image
-	// a_paucEncodingBits points to the final encoding bits of a previous encoding
-	//
-	void Block4x4Encoding_RGBA8::InitFromEncodingBits(Block4x4 *a_pblockParent,
-														unsigned char *a_paucEncodingBits,
-														ColorFloatRGBA *a_pafrgbaSource,
-														ErrorMetric a_errormetric)
-	{
-
-		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
-		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
-
-		// init RGB portion
-		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
-													(unsigned char *) m_pencodingbitsRGB8,
-													a_pafrgbaSource,
-													a_errormetric);
-
-		// init A8 portion
-		// has to be done after InitFromEncodingBits()
-		{
-			m_fBase = m_pencodingbitsA8->data.base / 255.0f;
-			m_fMultiplier = (float)m_pencodingbitsA8->data.multiplier;
-			m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
-
-			unsigned long long int ulliSelectorBits = 0;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors0 << 40;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors1 << 32;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors2 << 24;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors3 << 16;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors4 << 8;
-			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors5;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (ALPHA_SELECTORS - 1);
-			}
-
-			// decode the alphas
-			// calc alpha error
-			m_fError = 0.0f;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afDecodedAlphas[uiPixel] = DecodePixelAlpha(m_fBase, m_fMultiplier,
-					m_uiModifierTableIndex,
-					m_auiAlphaSelectors[uiPixel]);
-
-				float fDeltaAlpha = m_afDecodedAlphas[uiPixel] - m_pafrgbaSource[uiPixel].fA;
-				m_fError += fDeltaAlpha * fDeltaAlpha;
-			}
-		}
-
-		// redo error calc to include alpha
-		CalcBlockError();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
-	//
-	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		if (m_uiEncodingIterations == 0)
-		{
-			if (a_fEffort < 24.9f)
-			{
-				CalculateA8(0.0f);
-			}
-			else if (a_fEffort < 49.9f)
-			{
-				CalculateA8(1.0f);
-			}
-			else
-			{
-				CalculateA8(2.0f);
-			}
-		}
-
-		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// find the best combination of base alpga, multiplier and selectors
-	//
-	// a_fRadius limits the range of base alpha to try
-	//
-	void Block4x4Encoding_RGBA8::CalculateA8(float a_fRadius)
-	{
-
-		// find min/max alpha
-		float fMinAlpha = 1.0f;
-		float fMaxAlpha = 0.0f;
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			float fAlpha = m_pafrgbaSource[uiPixel].fA;
-
-			// ignore border pixels
-			if (isnan(fAlpha))
-			{
-				continue;
-			}
-
-			if (fAlpha < fMinAlpha)
-			{
-				fMinAlpha = fAlpha;
-			}
-			if (fAlpha > fMaxAlpha)
-			{
-				fMaxAlpha = fAlpha;
-			}
-		}
-		assert(fMinAlpha <= fMaxAlpha);
-
-		float fAlphaRange = fMaxAlpha - fMinAlpha;
-
-		// try each modifier table entry
-		m_fError = FLT_MAX;		// artificially high value
-		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
-		{
-			static const unsigned int MIN_VALUE_SELECTOR = 3;
-			static const unsigned int MAX_VALUE_SELECTOR = 7;
-
-			float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
-
-			float fTableEntryRange = s_aafModifierTable[uiTableEntry][MAX_VALUE_SELECTOR] -
-				s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
-
-			float fCenterRatio = fTableEntryCenter / fTableEntryRange;
-
-			float fCenter = fMinAlpha + fCenterRatio*fAlphaRange;
-			fCenter = roundf(255.0f * fCenter) / 255.0f;
-
-			float fMinBase = fCenter - (a_fRadius / 255.0f);
-			if (fMinBase < 0.0f)
-			{
-				fMinBase = 0.0f;
-			}
-
-			float fMaxBase = fCenter + (a_fRadius / 255.0f);
-			if (fMaxBase > 1.0f)
-			{
-				fMaxBase = 1.0f;
-			}
-
-			for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
-			{
-
-				float fRangeMultiplier = roundf(fAlphaRange / fTableEntryRange);
-
-				float fMinMultiplier = fRangeMultiplier - a_fRadius;
-				if (fMinMultiplier < 1.0f)
-				{
-					fMinMultiplier = 1.0f;
-				}
-				else if (fMinMultiplier > 15.0f)
-				{
-					fMinMultiplier = 15.0f;
-				}
-
-				float fMaxMultiplier = fRangeMultiplier + a_fRadius;
-				if (fMaxMultiplier < 1.0f)
-				{
-					fMaxMultiplier = 1.0f;
-				}
-				else if (fMaxMultiplier > 15.0f)
-				{
-					fMaxMultiplier = 15.0f;
-				}
-
-				for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
-				{
-					// find best selector for each pixel
-					unsigned int auiBestSelectors[PIXELS];
-					float afBestAlphaError[PIXELS];
-					float afBestDecodedAlphas[PIXELS];
-					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-					{
-						float fBestPixelAlphaError = FLT_MAX;
-						for (unsigned int uiSelector = 0; uiSelector < ALPHA_SELECTORS; uiSelector++)
-						{
-							float fDecodedAlpha = DecodePixelAlpha(fBase, fMultiplier, uiTableEntry, uiSelector);
-
-							// border pixels (NAN) should have zero error
-							float fPixelDeltaAlpha = isnan(m_pafrgbaSource[uiPixel].fA) ?
-															0.0f :
-															fDecodedAlpha - m_pafrgbaSource[uiPixel].fA;
-
-							float fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
-
-							if (fPixelAlphaError < fBestPixelAlphaError)
-							{
-								fBestPixelAlphaError = fPixelAlphaError;
-								auiBestSelectors[uiPixel] = uiSelector;
-								afBestAlphaError[uiPixel] = fBestPixelAlphaError;
-								afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
-							}
-						}
-					}
-
-					float fBlockError = 0.0f;
-					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-					{
-						fBlockError += afBestAlphaError[uiPixel];
-					}
-
-					if (fBlockError < m_fError)
-					{
-						m_fError = fBlockError;
-
-						m_fBase = fBase;
-						m_fMultiplier = fMultiplier;
-						m_uiModifierTableIndex = uiTableEntry;
-						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-						{
-							m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
-							m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel];
-						}
-					}
-				}
-			}
-
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
-	{
-
-		// set the RGB8 portion
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		{
-			m_pencodingbitsA8->data.base = (unsigned char)roundf(255.0f * m_fBase);
-			m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
-			m_pencodingbitsA8->data.multiplier = (unsigned char)roundf(m_fMultiplier);
-
-			unsigned long long int ulliSelectorBits = 0;
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				unsigned int uiShift = 45 - (3 * uiPixel);
-				ulliSelectorBits |= ((unsigned long long int)m_auiAlphaSelectors[uiPixel]) << uiShift;
-			}
-
-			m_pencodingbitsA8->data.selectors0 = ulliSelectorBits >> 40;
-			m_pencodingbitsA8->data.selectors1 = ulliSelectorBits >> 32;
-			m_pencodingbitsA8->data.selectors2 = ulliSelectorBits >> 24;
-			m_pencodingbitsA8->data.selectors3 = ulliSelectorBits >> 16;
-			m_pencodingbitsA8->data.selectors4 = ulliSelectorBits >> 8;
-			m_pencodingbitsA8->data.selectors5 = ulliSelectorBits;
-		}
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8_Opaque
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGBA8_Opaque::PerformIteration(float a_fEffort)
-	{
-		assert(!m_boolDone);
-
-		if (m_uiEncodingIterations == 0)
-		{
-			m_fError = 0.0f;
-
-			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-			{
-				m_afDecodedAlphas[uiPixel] = 1.0f;
-			}
-		}
-
-		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8_Opaque::SetEncodingBits(void)
-	{
-
-		// set the RGB8 portion
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		m_pencodingbitsA8->data.base = 255;
-		m_pencodingbitsA8->data.table = 15;
-		m_pencodingbitsA8->data.multiplier = 15;
-		m_pencodingbitsA8->data.selectors0 = 0xFF;
-		m_pencodingbitsA8->data.selectors1 = 0xFF;
-		m_pencodingbitsA8->data.selectors2 = 0xFF;
-		m_pencodingbitsA8->data.selectors3 = 0xFF;
-		m_pencodingbitsA8->data.selectors4 = 0xFF;
-		m_pencodingbitsA8->data.selectors5 = 0xFF;
-
-	}
-
-	// ####################################################################################################
-	// Block4x4Encoding_RGBA8_Transparent
-	// ####################################################################################################
-
-	// ----------------------------------------------------------------------------------------------------
-	// perform a single encoding iteration
-	// replace the encoding if a better encoding was found
-	// subsequent iterations generally take longer for each iteration
-	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
-	//
-	void Block4x4Encoding_RGBA8_Transparent::PerformIteration(float )
-	{
-		assert(!m_boolDone);
-		assert(m_uiEncodingIterations == 0);
-
-		m_mode = MODE_ETC1;
-		m_boolDiff = true;
-		m_boolFlip = false;
-
-		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
-		{
-			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
-			m_afDecodedAlphas[uiPixel] = 0.0f;
-		}
-
-		m_fError = 0.0f;
-
-		m_boolDone = true;
-		m_uiEncodingIterations++;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// set the encoding bits based on encoding state
-	//
-	void Block4x4Encoding_RGBA8_Transparent::SetEncodingBits(void)
-	{
-
-		Block4x4Encoding_RGB8::SetEncodingBits();
-
-		// set the A8 portion
-		m_pencodingbitsA8->data.base = 0;
-		m_pencodingbitsA8->data.table = 0;
-		m_pencodingbitsA8->data.multiplier = 1;
-		m_pencodingbitsA8->data.selectors0 = 0;
-		m_pencodingbitsA8->data.selectors1 = 0;
-		m_pencodingbitsA8->data.selectors2 = 0;
-		m_pencodingbitsA8->data.selectors3 = 0;
-		m_pencodingbitsA8->data.selectors4 = 0;
-		m_pencodingbitsA8->data.selectors5 = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-}
diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h
deleted file mode 100644
index 5765d36b90..0000000000
--- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcBlock4x4Encoding_RGB8.h"
-
-namespace Etc
-{
-	class Block4x4EncodingBits_A8;
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8
-	// RGBA8 if not completely opaque or transparent
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
-	{
-	public:
-
-		Block4x4Encoding_RGBA8(void);
-		virtual ~Block4x4Encoding_RGBA8(void);
-
-		virtual void InitFromSource(Block4x4 *a_pblockParent,
-									ColorFloatRGBA *a_pafrgbaSource,
-									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
-
-		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
-											unsigned char *a_paucEncodingBits,
-											ColorFloatRGBA *a_pafrgbaSource,
-											ErrorMetric a_errormetric);
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	protected:
-
-		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
-		static const unsigned int ALPHA_SELECTOR_BITS = 3;
-		static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
-
-		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS];
-
-		void CalculateA8(float a_fRadius);
-
-		Block4x4EncodingBits_A8 *m_pencodingbitsA8;	// A8 portion of Block4x4EncodingBits_RGBA8
-
-		float m_fBase;
-		float m_fMultiplier;
-		unsigned int m_uiModifierTableIndex;
-		unsigned int m_auiAlphaSelectors[PIXELS];
-
-	private:
-
-		inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
-										unsigned int a_uiTableIndex, unsigned int a_uiSelector)
-		{
-			float fPixelAlpha = a_fBase + 
-								a_fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector];
-			if (fPixelAlpha < 0.0f)
-			{
-				fPixelAlpha = 0.0f;
-			}
-			else if (fPixelAlpha > 1.0f)
-			{
-				fPixelAlpha = 1.0f;
-			}
-
-			return fPixelAlpha;
-		}
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8_Opaque
-	// RGBA8 if all pixels have alpha==1
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8_Opaque : public Block4x4Encoding_RGBA8
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	};
-
-	// ################################################################################
-	// Block4x4Encoding_RGBA8_Transparent
-	// RGBA8 if all pixels have alpha==0
-	// ################################################################################
-
-	class Block4x4Encoding_RGBA8_Transparent : public Block4x4Encoding_RGBA8
-	{
-	public:
-
-		virtual void PerformIteration(float a_fEffort);
-
-		virtual void SetEncodingBits(void);
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcColor.h b/thirdparty/etc2comp/EtcColor.h
deleted file mode 100644
index 7ceae05b65..0000000000
--- a/thirdparty/etc2comp/EtcColor.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	inline float LogToLinear(float a_fLog)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLog <= 0.04045f)
-		{
-			return a_fLog / 12.92f;
-		}
-		else
-		{
-			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
-		}
-	}
-
-	inline float LinearToLog(float &a_fLinear)
-	{
-		static const float ALPHA = 0.055f;
-		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
-
-		if (a_fLinear <= 0.0031308f)
-		{
-			return 12.92f * a_fLinear;
-		}
-		else
-		{
-			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
-		}
-	}
-
-	class ColorR8G8B8A8
-	{
-	public:
-
-		unsigned char ucR;
-		unsigned char ucG;
-		unsigned char ucB;
-		unsigned char ucA;
-
-	};
-}
diff --git a/thirdparty/etc2comp/EtcColorFloatRGBA.h b/thirdparty/etc2comp/EtcColorFloatRGBA.h
deleted file mode 100644
index f2ca2c1f71..0000000000
--- a/thirdparty/etc2comp/EtcColorFloatRGBA.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcConfig.h"
-#include "EtcColor.h"
-
-#include <math.h>
-
-namespace Etc
-{
-
-	class ColorFloatRGBA
-    {
-    public:
-
-		ColorFloatRGBA(void)
-        {
-            fR = fG = fB = fA = 0.0f;
-        }
-
-		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
-        {
-            fR = a_fR;
-            fG = a_fG;
-            fB = a_fB;
-            fA = a_fA;
-        }
-
-		inline ColorFloatRGBA operator+(ColorFloatRGBA& a_rfrgba)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_rfrgba.fR;
-			frgba.fG = fG + a_rfrgba.fG;
-			frgba.fB = fB + a_rfrgba.fB;
-			frgba.fA = fA + a_rfrgba.fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator+(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR + a_f;
-			frgba.fG = fG + a_f;
-			frgba.fB = fB + a_f;
-			frgba.fA = fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator-(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR - a_f;
-			frgba.fG = fG - a_f;
-			frgba.fB = fB - a_f;
-			frgba.fA = fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator-(ColorFloatRGBA& a_rfrgba)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR - a_rfrgba.fR;
-			frgba.fG = fG - a_rfrgba.fG;
-			frgba.fB = fB - a_rfrgba.fB;
-			frgba.fA = fA - a_rfrgba.fA;
-			return frgba;
-		}
-
-		inline ColorFloatRGBA operator*(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = fR * a_f;
-			frgba.fG = fG * a_f;
-			frgba.fB = fB * a_f;
-			frgba.fA = fA;
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ScaleRGB(float a_f)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = a_f * fR;
-			frgba.fG = a_f * fG;
-			frgba.fB = a_f * fB;
-			frgba.fA = fA;
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA RoundRGB(void)
-		{
-			ColorFloatRGBA frgba;
-			frgba.fR = roundf(fR);
-			frgba.fG = roundf(fG);
-			frgba.fB = roundf(fB);
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ToLinear()
-		{
-			ColorFloatRGBA frgbaLinear;
-			frgbaLinear.fR = LogToLinear(fR);
-			frgbaLinear.fG = LogToLinear(fG);
-			frgbaLinear.fB = LogToLinear(fB);
-			frgbaLinear.fA = fA;
-
-			return frgbaLinear;
-		}
-
-		inline ColorFloatRGBA ToLog(void)
-		{
-			ColorFloatRGBA frgbaLog;
-			frgbaLog.fR = LinearToLog(fR);
-			frgbaLog.fG = LinearToLog(fG);
-			frgbaLog.fB = LinearToLog(fB);
-			frgbaLog.fA = fA;
-
-			return frgbaLog;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGBA8(unsigned char a_ucR, 
-			unsigned char a_ucG, unsigned char a_ucB, unsigned char a_ucA)
-		{
-			ColorFloatRGBA frgba;
-
-			frgba.fR = (float)a_ucR / 255.0f;
-			frgba.fG = (float)a_ucG / 255.0f;
-			frgba.fB = (float)a_ucB / 255.0f;
-			frgba.fA = (float)a_ucA / 255.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGB4(unsigned char a_ucR4,
-														unsigned char a_ucG4,
-														unsigned char a_ucB4)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR4 << 4) + a_ucR4);
-			unsigned char ucG8 = (unsigned char)((a_ucG4 << 4) + a_ucG4);
-			unsigned char ucB8 = (unsigned char)((a_ucB4 << 4) + a_ucB4);
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromRGB5(unsigned char a_ucR5,
-			unsigned char a_ucG5,
-			unsigned char a_ucB5)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR5 << 3) + (a_ucR5 >> 2));
-			unsigned char ucG8 = (unsigned char)((a_ucG5 << 3) + (a_ucG5 >> 2));
-			unsigned char ucB8 = (unsigned char)((a_ucB5 << 3) + (a_ucB5 >> 2));
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		inline static ColorFloatRGBA ConvertFromR6G7B6(unsigned char a_ucR6,
-			unsigned char a_ucG7,
-			unsigned char a_ucB6)
-		{
-			ColorFloatRGBA frgba;
-
-			unsigned char ucR8 = (unsigned char)((a_ucR6 << 2) + (a_ucR6 >> 4));
-			unsigned char ucG8 = (unsigned char)((a_ucG7 << 1) + (a_ucG7 >> 6));
-			unsigned char ucB8 = (unsigned char)((a_ucB6 << 2) + (a_ucB6 >> 4));
-
-			frgba.fR = (float)ucR8 / 255.0f;
-			frgba.fG = (float)ucG8 / 255.0f;
-			frgba.fB = (float)ucB8 / 255.0f;
-			frgba.fA = 1.0f;
-
-			return frgba;
-		}
-
-		// quantize to 4 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR4G4B4(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 4 bits
-			frgba = frgba.ClampRGB().ScaleRGB(15.0f).RoundRGB();
-			unsigned int uiR4 = (unsigned int)frgba.fR;
-			unsigned int uiG4 = (unsigned int)frgba.fG;
-			unsigned int uiB4 = (unsigned int)frgba.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float) ((uiR4 << 4) + uiR4);
-			frgba.fG = (float) ((uiG4 << 4) + uiG4);
-			frgba.fB = (float) ((uiB4 << 4) + uiB4);
-
-			frgba = frgba.ScaleRGB(1.0f/255.0f);
-
-			return frgba;
-		}
-
-		// quantize to 5 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR5G5B5(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 5 bits
-			frgba = frgba.ClampRGB().ScaleRGB(31.0f).RoundRGB();
-			unsigned int uiR5 = (unsigned int)frgba.fR;
-			unsigned int uiG5 = (unsigned int)frgba.fG;
-			unsigned int uiB5 = (unsigned int)frgba.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float)((uiR5 << 3) + (uiR5 >> 2));
-			frgba.fG = (float)((uiG5 << 3) + (uiG5 >> 2));
-			frgba.fB = (float)((uiB5 << 3) + (uiB5 >> 2));
-
-			frgba = frgba.ScaleRGB(1.0f / 255.0f);
-
-			return frgba;
-		}
-
-		// quantize to 6/7/6 bits, expand to 8 bits
-		inline ColorFloatRGBA QuantizeR6G7B6(void) const
-		{
-			ColorFloatRGBA frgba = *this;
-
-			// quantize to 6/7/6 bits
-			ColorFloatRGBA frgba6 = frgba.ClampRGB().ScaleRGB(63.0f).RoundRGB();
-			ColorFloatRGBA frgba7 = frgba.ClampRGB().ScaleRGB(127.0f).RoundRGB();
-			unsigned int uiR6 = (unsigned int)frgba6.fR;
-			unsigned int uiG7 = (unsigned int)frgba7.fG;
-			unsigned int uiB6 = (unsigned int)frgba6.fB;
-
-			// expand to 8 bits
-			frgba.fR = (float)((uiR6 << 2) + (uiR6 >> 4));
-			frgba.fG = (float)((uiG7 << 1) + (uiG7 >> 6));
-			frgba.fB = (float)((uiB6 << 2) + (uiB6 >> 4));
-
-			frgba = frgba.ScaleRGB(1.0f / 255.0f);
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ClampRGB(void)
-		{
-			ColorFloatRGBA frgba = *this;
-			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
-			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
-			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
-			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
-			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
-			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
-
-			return frgba;
-		}
-
-		inline ColorFloatRGBA ClampRGBA(void)
-		{
-			ColorFloatRGBA frgba = *this;
-			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
-			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
-			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
-			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
-			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
-			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
-			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
-			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
-
-			return frgba;
-		}
-
-		inline int IntRed(float a_fScale)
-		{
-			return (int)roundf(fR * a_fScale);
-		}
-
-		inline int IntGreen(float a_fScale)
-		{
-			return (int)roundf(fG * a_fScale);
-		}
-
-		inline int IntBlue(float a_fScale)
-		{
-			return (int)roundf(fB * a_fScale);
-		}
-
-		inline int IntAlpha(float a_fScale)
-		{
-			return (int)roundf(fA * a_fScale);
-		}
-
-		float	fR, fG, fB, fA;
-    };
-
-}
-
diff --git a/thirdparty/etc2comp/EtcConfig.h b/thirdparty/etc2comp/EtcConfig.h
deleted file mode 100644
index 3bfe1d99a8..0000000000
--- a/thirdparty/etc2comp/EtcConfig.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef _WIN32
-#define ETC_WINDOWS (1)
-#else
-#define ETC_WINDOWS (0)
-#endif
-
-#if __APPLE__
-#define ETC_OSX (1)
-#else
-#define ETC_OSX (0)
-#endif
-
-#if __unix__
-#define ETC_UNIX (1)
-#else
-#define ETC_UNIX (0)
-#endif
-
-
-// short names for common types
-#include <stdint.h>
-typedef int8_t i8;
-typedef int16_t i16;
-typedef int32_t i32;
-typedef int64_t i64;
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-typedef float	f32;
-typedef double	f64;
-
-// Keep asserts enabled in release builds during development
-#undef NDEBUG
-
-// 0=disable. stb_image can be used if you need to compress
-//other image formats like jpg
-#define USE_STB_IMAGE_LOAD 0
-
-#if ETC_WINDOWS
-#include <sdkddkver.h>
-#define _CRT_SECURE_NO_WARNINGS (1)
-#include <tchar.h>
-#endif
-
-#include <stdio.h>
-
diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.cpp b/thirdparty/etc2comp/EtcDifferentialTrys.cpp
deleted file mode 100644
index ef4cd103d9..0000000000
--- a/thirdparty/etc2comp/EtcDifferentialTrys.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcDifferentialTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcDifferentialTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-										const unsigned int *a_pauiPixelMapping1,
-										const unsigned int *a_pauiPixelMapping2,
-										unsigned int a_uiRadius,
-										int a_iGrayOffset1, int a_iGrayOffset2)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		m_boolSeverelyBentColors = false;
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
-		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
-
-		int iDeltaRed = iRed2 - iRed1;
-		int iDeltaGreen = iGreen2 - iGreen1;
-		int iDeltaBlue = iBlue2 - iBlue1;
-
-		// make sure components are within range
-		{
-			if (iDeltaRed > 3)
-			{
-				if (iDeltaRed > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed - 3) / 2;
-				iRed2 = iRed1 + 3;
-				iDeltaRed = 3;
-			}
-			else if (iDeltaRed < -4)
-			{
-				if (iDeltaRed < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iRed1 += (iDeltaRed + 4) / 2;
-				iRed2 = iRed1 - 4;
-				iDeltaRed = -4;
-			}
-			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
-			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
-
-			if (iDeltaGreen > 3)
-			{
-				if (iDeltaGreen > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen - 3) / 2;
-				iGreen2 = iGreen1 + 3;
-				iDeltaGreen = 3;
-			}
-			else if (iDeltaGreen < -4)
-			{
-				if (iDeltaGreen < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iGreen1 += (iDeltaGreen + 4) / 2;
-				iGreen2 = iGreen1 - 4;
-				iDeltaGreen = -4;
-			}
-			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
-			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
-
-			if (iDeltaBlue > 3)
-			{
-				if (iDeltaBlue > 7)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue - 3) / 2;
-				iBlue2 = iBlue1 + 3;
-				iDeltaBlue = 3;
-			}
-			else if (iDeltaBlue < -4)
-			{
-				if (iDeltaBlue < -8)
-				{
-					m_boolSeverelyBentColors = true;
-				}
-
-				iBlue1 += (iDeltaBlue + 4) / 2;
-				iBlue2 = iBlue1 - 4;
-				iDeltaBlue = -4;
-			}
-			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
-			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
-			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
-		}
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
-										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.h b/thirdparty/etc2comp/EtcDifferentialTrys.h
deleted file mode 100644
index 71860908ff..0000000000
--- a/thirdparty/etc2comp/EtcDifferentialTrys.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class DifferentialTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 2;
-
-		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
-							ColorFloatRGBA a_frgbaColor2,
-							const unsigned int *a_pauiPixelMapping1,
-							const unsigned int *a_pauiPixelMapping2,
-							unsigned int a_uiRadius,
-							int a_iGrayOffset1, int a_iGrayOffset2);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (31- a_iDistance))
-			{
-				return (31 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 125;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-		bool m_boolSeverelyBentColors;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcErrorMetric.h b/thirdparty/etc2comp/EtcErrorMetric.h
deleted file mode 100644
index df4dcab4fb..0000000000
--- a/thirdparty/etc2comp/EtcErrorMetric.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace Etc
-{
-
-	enum ErrorMetric
-	{
-		RGBA,
-		RGBX,
-		REC709,
-		NUMERIC,
-		NORMALXYZ,
-		//
-		ERROR_METRICS,
-		//
-		BT709 = REC709
-	};
-
-	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
-	{
-		switch (errorMetric)
-		{
-		case RGBA:
-			return "RGBA";
-		case RGBX:
-			return "RGBX";
-		case REC709:
-			return "REC709";
-		case NUMERIC:
-			return "NUMERIC";
-		case NORMALXYZ:
-			return "NORMALXYZ";
-		case ERROR_METRICS:
-		default:
-			return "UNKNOWN";
-		}
-	}
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFile.cpp b/thirdparty/etc2comp/EtcFile.cpp
deleted file mode 100644
index 831a3aac45..0000000000
--- a/thirdparty/etc2comp/EtcFile.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _WIN32
-#define _CRT_SECURE_NO_WARNINGS (1)
-#endif
-
-#include "EtcConfig.h"
-
-
-#include "EtcFile.h"
-
-#include "EtcFileHeader.h"
-#include "EtcColor.h"
-#include "Etc.h"
-#include "EtcBlock4x4EncodingBits.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-using namespace Etc;
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-			unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
-			unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-			unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		m_pstrFilename = const_cast<char *>("");
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	m_imageformat = a_imageformat;
-
-	m_uiNumMipmaps = 1;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-	m_pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(a_paucEncodingBits, [](unsigned char *p) { delete[] p; } );
-	m_pMipmapImages[0].uiEncodingBitsBytes = a_uiEncodingBitsBytes;
-	m_pMipmapImages[0].uiExtendedWidth = a_uiExtendedWidth;
-	m_pMipmapImages[0].uiExtendedHeight = a_uiExtendedHeight;
-
-	m_uiSourceWidth = a_uiSourceWidth;
-	m_uiSourceHeight = a_uiSourceHeight;
-
-	switch (m_fileformat)
-	{
-	case Format::PKM:
-		m_pheader = new FileHeader_Pkm(this);
-		break;
-
-	case Format::KTX:
-		m_pheader = new FileHeader_Ktx(this);
-		break;
-
-	default:
-		assert(0);
-		break;
-	}
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-	unsigned int a_uiNumMipmaps, RawImage *a_pMipmapImages,
-	unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		m_pstrFilename = const_cast<char *>("");
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	m_imageformat = a_imageformat;
-
-	m_uiNumMipmaps = a_uiNumMipmaps;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-
-	for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++)
-	{
-		m_pMipmapImages[mip] = a_pMipmapImages[mip];
-	}
-
-	m_uiSourceWidth = a_uiSourceWidth;
-	m_uiSourceHeight = a_uiSourceHeight;
-
-	switch (m_fileformat)
-	{
-	case Format::PKM:
-		m_pheader = new FileHeader_Pkm(this);
-		break;
-
-	case Format::KTX:
-		m_pheader = new FileHeader_Ktx(this);
-		break;
-
-	default:
-		assert(0);
-		break;
-	}
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-File::File(const char *a_pstrFilename, Format a_fileformat)
-{
-	if (a_pstrFilename == nullptr)
-	{
-		return;
-	}
-	else
-	{
-		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
-		strcpy(m_pstrFilename, a_pstrFilename);
-	}
-
-	m_fileformat = a_fileformat;
-	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
-	{
-		// ***** TODO: add this later *****
-		m_fileformat = Format::KTX;
-	}
-
-	FILE *pfile = fopen(m_pstrFilename, "rb");
-	if (pfile == nullptr)
-	{
-		printf("ERROR: Couldn't open %s", m_pstrFilename);
-		exit(1);
-	}
-	fseek(pfile, 0, SEEK_END);
-	unsigned int fileSize = ftell(pfile);
-	fseek(pfile, 0, SEEK_SET);
-	size_t szResult;
-
-	m_pheader = new FileHeader_Ktx(this);
-	szResult = fread( ((FileHeader_Ktx*)m_pheader)->GetData(), 1, sizeof(FileHeader_Ktx::Data), pfile);
-	assert(szResult > 0);
-
-	m_uiNumMipmaps = 1;
-	m_pMipmapImages = new RawImage[m_uiNumMipmaps];
-
-	if (((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData > 0)
-		fseek(pfile, ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData, SEEK_CUR);
-	szResult = fread(&m_pMipmapImages->uiEncodingBitsBytes, 1, sizeof(unsigned int), pfile);
-	assert(szResult > 0);
-
-	m_pMipmapImages->paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[m_pMipmapImages->uiEncodingBitsBytes], [](unsigned char *p) { delete[] p; } );
-	assert(ftell(pfile) + m_pMipmapImages->uiEncodingBitsBytes <= fileSize);
-	szResult = fread(m_pMipmapImages->paucEncodingBits.get(), 1, m_pMipmapImages->uiEncodingBitsBytes, pfile);
-	assert(szResult == m_pMipmapImages->uiEncodingBitsBytes);
-
-	uint32_t uiInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlInternalFormat;
-	uint32_t uiBaseInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlBaseInternalFormat;
-	
-	if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC1_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC1_RGB8)
-	{
-		m_imageformat = Image::Format::ETC1;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8)
-	{
-		m_imageformat = Image::Format::RGB8;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8A1 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8A1)
-	{
-		m_imageformat = Image::Format::RGB8A1;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGBA8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGBA8)
-	{
-		m_imageformat = Image::Format::RGBA8;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
-	{
-		m_imageformat = Image::Format::R11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
-	{
-		m_imageformat = Image::Format::SIGNED_R11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
-	{
-		m_imageformat = Image::Format::RG11;
-	}
-	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
-	{
-		m_imageformat = Image::Format::SIGNED_RG11;
-	}
-	else
-	{
-		m_imageformat = Image::Format::UNKNOWN;
-	}
-
-	m_uiSourceWidth = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelWidth;
-	m_uiSourceHeight = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelHeight;
-	m_pMipmapImages->uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-	m_pMipmapImages->uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-	unsigned int uiBlocks = m_pMipmapImages->uiExtendedWidth * m_pMipmapImages->uiExtendedHeight / 16;
-	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
-	unsigned int expectedbytes = uiBlocks * Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
-	assert(expectedbytes == m_pMipmapImages->uiEncodingBitsBytes);
-
-	fclose(pfile);
-}
-
-File::~File()
-{
-	if (m_pMipmapImages != nullptr)
-	{
-		delete [] m_pMipmapImages;
-	}
-
-	if(m_pstrFilename != nullptr)
-	{
-		delete[] m_pstrFilename;
-		m_pstrFilename = nullptr;
-	}
-	if (m_pheader != nullptr)
-	{
-		delete m_pheader;
-		m_pheader = nullptr;
-	}
-}
-
-void File::UseSingleBlock(int a_iPixelX, int a_iPixelY)
-{
-	if (a_iPixelX <= -1 || a_iPixelY <= -1)
-		return;
-	if (a_iPixelX >(int) m_uiSourceWidth)
-	{
-		//if we are using a ktx thats the size of a single block or less
-		//then make sure we use the 4x4 image as the single block
-		if (m_uiSourceWidth <= 4)
-		{
-			a_iPixelX = 0;
-		}
-		else
-		{
-			printf("blockAtHV: H coordinate out of range, capped to image width\n");
-			a_iPixelX = m_uiSourceWidth - 1;
-		}
-	}
-	if (a_iPixelY >(int) m_uiSourceHeight)
-	{
-		//if we are using a ktx thats the size of a single block or less
-		//then make sure we use the 4x4 image as the single block
-		if (m_uiSourceHeight <= 4)
-		{
-			a_iPixelY= 0;
-		}
-		else
-		{
-			printf("blockAtHV: V coordinate out of range, capped to image height\n");
-			a_iPixelY = m_uiSourceHeight - 1;
-		}
-	}
-
-	unsigned int origWidth = m_uiSourceWidth;
-	unsigned int origHeight = m_uiSourceHeight;
-
-	m_uiSourceWidth = 4;
-	m_uiSourceHeight = 4;
-
-	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
-	unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
-
-	int numMipmaps = 1;
-	RawImage* pMipmapImages = new RawImage[numMipmaps];
-	pMipmapImages[0].uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-	pMipmapImages[0].uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-	pMipmapImages[0].uiEncodingBitsBytes = 0;
-	pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[uiEncodingBitsBytesPerBlock], [](unsigned char *p) { delete[] p; });
-
-	//block position in pixels
-	// remove the bottom 2 bits to get the block coordinates 
-	unsigned int iBlockPosX = (a_iPixelX & 0xFFFFFFFC);
-	unsigned int iBlockPosY = (a_iPixelY & 0xFFFFFFFC);
-
-	int numXBlocks = (origWidth / 4);
-	int numYBlocks = (origHeight / 4);
-	
-
-	// block location 
-	//int iBlockX = (a_iPixelX % 4) == 0 ? a_iPixelX / 4.0f : (a_iPixelX / 4) + 1;
-	//int iBlockY = (a_iPixelY % 4) == 0 ? a_iPixelY / 4.0f : (a_iPixelY / 4) + 1;
-	//m_paucEncodingBits += ((iBlockY * numXBlocks) + iBlockX) * uiEncodingBitsBytesPerBlock;
-
-	
-	unsigned int num = numXBlocks*numYBlocks;
-	unsigned int uiH = 0, uiV = 0;
-	unsigned char* pEncodingBits = m_pMipmapImages[0].paucEncodingBits.get();
-	for (unsigned int uiBlock = 0; uiBlock < num; uiBlock++)
-	{
-		if (uiH == iBlockPosX && uiV == iBlockPosY)
-		{
-			memcpy(pMipmapImages[0].paucEncodingBits.get(),pEncodingBits, uiEncodingBitsBytesPerBlock);
-			break;
-		}
-		pEncodingBits += uiEncodingBitsBytesPerBlock;
-		uiH += 4;
-
-		if (uiH >= origWidth)
-		{
-			uiH = 0;
-			uiV += 4;
-		}
-	}
-
-	delete [] m_pMipmapImages;
-	m_pMipmapImages = pMipmapImages;
-}
-// ----------------------------------------------------------------------------------------------------
-//
-void File::Write()
-{
-
-	FILE *pfile = fopen(m_pstrFilename, "wb");
-	if (pfile == nullptr)
-	{
-		printf("Error: couldn't open Etc file (%s)\n", m_pstrFilename);
-		exit(1);
-	}
-
-	m_pheader->Write(pfile);
-
-	for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++)
-	{
-		if(m_fileformat == Format::KTX)
-		{
-			// Write u32 image size
-			uint32_t u32ImageSize = m_pMipmapImages[mip].uiEncodingBitsBytes;
-			uint32_t szBytesWritten = fwrite(&u32ImageSize, 1, sizeof(u32ImageSize), pfile);
-			assert(szBytesWritten == sizeof(u32ImageSize));
-		}
-
-		unsigned int iResult = (int)fwrite(m_pMipmapImages[mip].paucEncodingBits.get(), 1, m_pMipmapImages[mip].uiEncodingBitsBytes, pfile);
-		if (iResult != m_pMipmapImages[mip].uiEncodingBitsBytes)
-	{
-		printf("Error: couldn't write Etc file (%s)\n", m_pstrFilename);
-		exit(1);
-		}
-	}
-
-	fclose(pfile);
-
-}
-
-// ----------------------------------------------------------------------------------------------------
-//
-
diff --git a/thirdparty/etc2comp/EtcFile.h b/thirdparty/etc2comp/EtcFile.h
deleted file mode 100644
index 69bf3b2d3a..0000000000
--- a/thirdparty/etc2comp/EtcFile.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-#include "EtcImage.h"
-#include "Etc.h"
-
-namespace Etc
-{
-	class FileHeader;
-	class SourceImage;
-
-	class File
-	{
-	public:
-
-		enum class Format
-		{
-			INFER_FROM_FILE_EXTENSION,
-			PKM,
-			KTX,
-		};
-
-		File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-				unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
-				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-				unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight);
-
-		File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
-			unsigned int a_uiNumMipmaps, RawImage *pMipmapImages,
-			unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight );
-
-		File(const char *a_pstrFilename, Format a_fileformat);
-		~File();
-		const char *GetFilename(void) { return m_pstrFilename; }
-
-		void Read(const char *a_pstrFilename);
-		void Write(void);
-
-		inline unsigned int GetSourceWidth(void)
-		{
-			return m_uiSourceWidth;
-		}
-
-		inline unsigned int GetSourceHeight(void)
-		{
-			return m_uiSourceHeight;
-		}
-
-		inline unsigned int GetExtendedWidth(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiExtendedWidth;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline unsigned int GetExtendedHeight(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiExtendedHeight;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline Image::Format GetImageFormat()
-		{
-			return m_imageformat;
-		}
-
-		inline unsigned int GetEncodingBitsBytes(unsigned int mipmapIndex = 0)
-		{
-			if (mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].uiEncodingBitsBytes;
-			}
-			else
-			{
-				return 0;
-			}
-		}
-
-		inline unsigned char*  GetEncodingBits(unsigned int mipmapIndex = 0)
-		{
-			if( mipmapIndex < m_uiNumMipmaps)
-			{
-				return m_pMipmapImages[mipmapIndex].paucEncodingBits.get();
-			}
-			else
-			{
-				return nullptr;
-			}
-		}
-
-		inline unsigned int GetNumMipmaps() 
-		{
-			return m_uiNumMipmaps; 
-		}
-
-		void UseSingleBlock(int a_iPixelX = -1, int a_iPixelY = -1);
-	private:
-
-		char *m_pstrFilename;               // includes directory path and file extension
-		Format m_fileformat;
-		Image::Format m_imageformat;
-		FileHeader *m_pheader;
-		unsigned int m_uiNumMipmaps;
-		RawImage*	 m_pMipmapImages;
-		unsigned int m_uiSourceWidth;
-		unsigned int m_uiSourceHeight;
-	};
-
-}
diff --git a/thirdparty/etc2comp/EtcFileHeader.cpp b/thirdparty/etc2comp/EtcFileHeader.cpp
deleted file mode 100644
index f02fcab011..0000000000
--- a/thirdparty/etc2comp/EtcFileHeader.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcFileHeader.h"
-
-#include "EtcBlock4x4EncodingBits.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Pkm::FileHeader_Pkm(File *a_pfile)
-	{
-		m_pfile = a_pfile;
-
-		static const char s_acMagicNumberData[4] = { 'P', 'K', 'M', ' ' };
-		static const char s_acVersionData[2] = { '1', '0' };
-
-		for (unsigned int ui = 0; ui < sizeof(s_acMagicNumberData); ui++)
-		{
-			m_data.m_acMagicNumber[ui] = s_acMagicNumberData[ui];
-		}
-
-		for (unsigned int ui = 0; ui < sizeof(s_acVersionData); ui++)
-		{
-			m_data.m_acVersion[ui] = s_acVersionData[ui];
-		}
-
-		m_data.m_ucDataType_msb = 0;        // ETC1_RGB_NO_MIPMAPS
-		m_data.m_ucDataType_lsb = 0;
-
-		m_data.m_ucOriginalWidth_msb = (unsigned char)(m_pfile->GetSourceWidth() >> 8);
-		m_data.m_ucOriginalWidth_lsb = m_pfile->GetSourceWidth() & 0xFF;
-		m_data.m_ucOriginalHeight_msb = (unsigned char)(m_pfile->GetSourceHeight() >> 8);
-		m_data.m_ucOriginalHeight_lsb = m_pfile->GetSourceHeight() & 0xFF;
-
-		m_data.m_ucExtendedWidth_msb = (unsigned char)(m_pfile->GetExtendedWidth() >> 8);
-		m_data.m_ucExtendedWidth_lsb = m_pfile->GetExtendedWidth() & 0xFF;
-		m_data.m_ucExtendedHeight_msb = (unsigned char)(m_pfile->GetExtendedHeight() >> 8);
-		m_data.m_ucExtendedHeight_lsb = m_pfile->GetExtendedHeight() & 0xFF;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void FileHeader_Pkm::Write(FILE *a_pfile)
-	{
-
-		fwrite(&m_data, sizeof(Data), 1, a_pfile);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Ktx::FileHeader_Ktx(File *a_pfile)
-	{
-		m_pfile = a_pfile;
-
-		static const uint8_t s_au8Itentfier[12] =
-		{ 
-			0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier
-			0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier
-			0x0D, 0x0A, 0x1A, 0x0A  // final four bytes of Byte[12] identifier
-		};
-
-		for (unsigned int ui = 0; ui < sizeof(s_au8Itentfier); ui++)
-		{
-			m_data.m_au8Identifier[ui] = s_au8Itentfier[ui];
-		}
-
-		m_data.m_u32Endianness				= 0x04030201;
-		m_data.m_u32GlType					= 0;
-		m_data.m_u32GlTypeSize				= 1;
-		m_data.m_u32GlFormat				= 0;
-
-		switch (m_pfile->GetImageFormat())
-		{
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8;
-			break;
-
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGBA8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGBA8;
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8A1;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8A1;
-			break;
-		
-		case Image::Format::R11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_R11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
-			break;
-
-		case Image::Format::SIGNED_R11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_R11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
-			break;
-		
-		case Image::Format::RG11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RG11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
-			break;
-
-		case Image::Format::SIGNED_RG11:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_RG11;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
-			break;
-
-		default:
-			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC1_RGB8;
-			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC1_RGB8;
-			break;
-		}
-
-		m_data.m_u32PixelWidth				= 0;
-		m_data.m_u32PixelHeight				= 0;
-		m_data.m_u32PixelDepth				= 0;
-		m_data.m_u32NumberOfArrayElements	= 0;
-		m_data.m_u32NumberOfFaces			= 0;
-		m_data.m_u32BytesOfKeyValueData		= 0;
-
-		m_pkeyvaluepair = nullptr;
-
-		m_u32Images = 0;
-		m_u32KeyValuePairs = 0;
-
-		m_data.m_u32PixelWidth = m_pfile->GetSourceWidth();
-		m_data.m_u32PixelHeight = m_pfile->GetSourceHeight();
-		m_data.m_u32PixelDepth = 0;
-		m_data.m_u32NumberOfArrayElements = 0;
-		m_data.m_u32NumberOfFaces = 1;
-		m_data.m_u32NumberOfMipmapLevels = m_pfile->GetNumMipmaps();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void FileHeader_Ktx::Write(FILE *a_pfile)
-	{
-		size_t szBytesWritten;
-
-		// Write header
-		szBytesWritten = fwrite(&m_data, 1, sizeof(Data), a_pfile);
-		assert(szBytesWritten == sizeof(Data));
-
-		// Write KeyAndValuePairs
-		if (m_u32KeyValuePairs)
-		{
-			fwrite(m_pkeyvaluepair, m_pkeyvaluepair->u32KeyAndValueByteSize, 1, a_pfile);
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	FileHeader_Ktx::Data *FileHeader_Ktx::GetData()
-	{
-		return &m_data;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFileHeader.h b/thirdparty/etc2comp/EtcFileHeader.h
deleted file mode 100644
index 55a9cb5d9d..0000000000
--- a/thirdparty/etc2comp/EtcFileHeader.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcFile.h"
-#include <stdio.h>
-#include <inttypes.h>
-
-namespace Etc
-{
-
-	class Image;
-
-	class FileHeader
-	{
-	public:
-
-		virtual void Write(FILE *a_pfile) = 0;
-		File GetFile();
-		virtual ~FileHeader(void) {}
-	protected:
-
-		File *m_pfile;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-    class FileHeader_Pkm : public FileHeader
-    {
-    public:
-
-		FileHeader_Pkm(File *a_pfile);
-
-		virtual void Write(FILE *a_pfile);
-		virtual ~FileHeader_Pkm(void) {}
-	private:
-
-		typedef struct
-		{
-			char m_acMagicNumber[4];
-			char m_acVersion[2];
-			unsigned char m_ucDataType_msb;             // e.g. ETC1_RGB_NO_MIPMAPS
-			unsigned char m_ucDataType_lsb;
-			unsigned char m_ucExtendedWidth_msb;     //  padded to 4x4 blocks
-			unsigned char m_ucExtendedWidth_lsb;
-			unsigned char m_ucExtendedHeight_msb;    //  padded to 4x4 blocks
-			unsigned char m_ucExtendedHeight_lsb;
-			unsigned char m_ucOriginalWidth_msb;
-			unsigned char m_ucOriginalWidth_lsb;
-			unsigned char m_ucOriginalHeight_msb;
-			unsigned char m_ucOriginalHeight_lsb;
-		} Data;
-
-		Data m_data;
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-    class FileHeader_Ktx : public FileHeader
-    {
-    public:
-
-		typedef struct
-		{
-			uint32_t	u32KeyAndValueByteSize;
-		} KeyValuePair;
-
-		typedef struct
-		{
-			uint8_t m_au8Identifier[12];
-			uint32_t m_u32Endianness;
-			uint32_t m_u32GlType;
-			uint32_t m_u32GlTypeSize;
-			uint32_t m_u32GlFormat;
-			uint32_t m_u32GlInternalFormat;
-			uint32_t m_u32GlBaseInternalFormat;
-			uint32_t m_u32PixelWidth;
-			uint32_t m_u32PixelHeight;
-			uint32_t m_u32PixelDepth;
-			uint32_t m_u32NumberOfArrayElements;
-			uint32_t m_u32NumberOfFaces;
-			uint32_t m_u32NumberOfMipmapLevels;
-			uint32_t m_u32BytesOfKeyValueData;
-		} Data;
-
-		enum class InternalFormat
-		{
-			ETC1_RGB8 = 0x8D64,
-			ETC1_ALPHA8 = ETC1_RGB8,
-			//
-			ETC2_R11 = 0x9270,
-			ETC2_SIGNED_R11 = 0x9271,
-			ETC2_RG11 = 0x9272,
-			ETC2_SIGNED_RG11 = 0x9273,
-			ETC2_RGB8 = 0x9274,
-			ETC2_SRGB8 = 0x9275,
-			ETC2_RGB8A1 = 0x9276,
-			ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277,
-			ETC2_RGBA8 = 0x9278
-		};
-
-		enum class BaseInternalFormat
-		{
-			ETC2_R11 = 0x1903,
-			ETC2_RG11 = 0x8227,
-			ETC1_RGB8 = 0x1907,
-			ETC1_ALPHA8 = ETC1_RGB8,
-			//
-			ETC2_RGB8 = 0x1907,
-			ETC2_RGB8A1 = 0x1908,
-			ETC2_RGBA8 = 0x1908,
-		};
-
-		FileHeader_Ktx(File *a_pfile);
-
-		virtual void Write(FILE *a_pfile);
-		virtual ~FileHeader_Ktx(void) {}
-
-		void AddKeyAndValue(KeyValuePair *a_pkeyvaluepair);
-
-		Data* GetData();
-
-	private:
-
-		Data m_data;
-		KeyValuePair *m_pkeyvaluepair;
-		
-		uint32_t m_u32Images;
-		uint32_t m_u32KeyValuePairs;
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcFilter.cpp b/thirdparty/etc2comp/EtcFilter.cpp
deleted file mode 100644
index 1ec8acdf3f..0000000000
--- a/thirdparty/etc2comp/EtcFilter.cpp
+++ /dev/null
@@ -1,404 +0,0 @@
-#include <stdlib.h>
-#include <math.h>
-#include "EtcFilter.h"
-
-
-namespace Etc
-{
-
-static const double PiConst = 3.14159265358979323846;
-
-inline double sinc(double x) 
-{
-    if ( x == 0.0 ) 
-    {
-        return 1.0;
-    }
-
-    return sin(PiConst * x) / (PiConst * x);
-}
-
-//inline float sincf( float x )
-//{
-//    x *= F_PI;
-//    if (x < 0.01f && x > -0.01f)
-//    {
-//        return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f);
-//    }
-//
-//    return sinf(x)/x;
-//}
-//
-//double bessel0(double x) 
-//{
-//    const double EPSILON_RATIO = 1E-16;
-//    double xh, sum, pow, ds;
-//    int k;
-//
-//    xh = 0.5 * x;
-//    sum = 1.0;
-//    pow = 1.0;
-//    k = 0;
-//    ds = 1.0;
-//    while (ds > sum * EPSILON_RATIO) 
-//    {
-//        ++k;
-//        pow = pow * (xh / k);
-//        ds = pow * pow;
-//        sum = sum + ds;
-//    }
-//
-//    return sum;
-//}
-
-//**--------------------------------------------------------------------------
-//** Name: kaiser(double alpha, double half_width, double x) 
-//** Returns:
-//** Description: Alpha controls shape of filter.  We are using 4.
-//**--------------------------------------------------------------------------
-//inline double kaiser(double alpha, double half_width, double x) 
-//{
-//    double ratio = (x / half_width);
-//    return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha);
-//}
-//
-//float Filter_Lanczos4Sinc(float x)
-//{
-//    if (x <= -4.0f || x >= 4.0f)    // half-width of 4
-//    {
-//        return 0.0;
-//    }
-//
-//    return sinc(0.875f * x) * sinc(0.25f * x);
-//}
-//
-//double Filter_Kaiser4( double t )
-//{
-//    return kaiser( 4.0, 3.0, t);
-//}
-//
-//double Filter_KaiserOptimal( double t )
-//{
-//    return kaiser( 8.93, 3.0f, t);
-//}                  
-
-double FilterLanczos3( double t )
-{
-	if ( t <= -3.0 || t >= 3.0 ) 
-    {
-        return 0.0;
-    }
-
-    return sinc( t ) * sinc( t / 3.0 );
-}
-
-double FilterBox( double t )
-{
-    return ( t > -0.5 && t < 0.5) ? 1.0 : 0.0;
-}
-
-double FilterLinear( double t )
-{
-	if (t < 0.0) t = -t;
-
-    return (t < 1.0) ? (1.0 - t) : 0.0;
-}
-
-
-//**--------------------------------------------------------------------------
-//** Name: CalcContributions( int srcSize, 
-//**                          int destSize, 
-//**                          double filterSize, 
-//**						  bool wrap,
-//**                          double (*FilterProc)(double), 
-//**                          FilterWeights contrib[] )
-//** Returns: void
-//** Description:
-//**--------------------------------------------------------------------------
-void CalcContributions( int srcSize, int destSize, double filterSize, bool wrap, double (*FilterProc)(double), FilterWeights contrib[] )
-{
-    double scale;
-    double filterScale;
-    double center;
-    double totalWeight;
-    double weight;
-    int   iRight;
-    int   iLeft;
-    int   iDest;
-
-    scale = (double)destSize / srcSize;
-    if ( scale < 1.0 )
-    {
-        filterSize = filterSize / scale;
-        filterScale = scale;
-    }
-    else
-    {
-        filterScale = 1.0;
-    }
-
-    if ( filterSize > (double)MaxFilterSize )
-    {
-        filterSize = (double)MaxFilterSize;
-    }
-
-    for ( iDest = 0; iDest < destSize; ++iDest )
-    {
-        center = (double)iDest / scale;
-
-        iLeft = (int)ceil(center - filterSize);
-		iRight = (int)floor(center + filterSize);
-
-		if ( !wrap )
-		{
-        if ( iLeft < 0 )
-        {
-            iLeft = 0;
-        }
-
-        if ( iRight >= srcSize )
-        {
-            iRight = srcSize - 1;
-        }
-		}
-
-		int numWeights = iRight - iLeft + 1;
-
-        contrib[iDest].first = iLeft;
-        contrib[iDest].numWeights = numWeights;
-
-        totalWeight = 0;
-		double t = ((double)iLeft - center) * filterScale;
-		for (int i = 0; i < numWeights; i++)
-        {
-			weight = (*FilterProc)(t) * filterScale;
-            totalWeight += weight;
-			contrib[iDest].weight[i] = weight;
-			t += filterScale;
-        }
-
-        //**--------------------------------------------------------
-        //** Normalize weights by dividing by the sum of the weights
-        //**--------------------------------------------------------
-        if ( totalWeight > 0.0 )
-        {   
-            for ( int i = 0; i < numWeights; i++)
-            {
-                contrib[iDest].weight[i] /= totalWeight;
-            }
-        }
-    }
-}
-
-//**-------------------------------------------------------------------------
-//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, 
-//**                       int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDestImage, 
-//**                       int destWidth, int destHeight, 
-//**                       double (*FilterProc)(double) )
-//** Returns: 0 on failure and 1 on success
-//** Description: Filters a 2d image with a two pass filter by averaging the
-//**    weighted contributions of the pixels within the filter region.  The
-//**    contributions are determined by a weighting function parameter.
-//**-------------------------------------------------------------------------
-int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                    RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) )
-{
-    FilterWeights *contrib;
-    RGBCOLOR *pPixel;
-    RGBCOLOR *pSrcPixel;
-    RGBCOLOR *pTempImage;
-    int iRow;
-    int iCol;
-    int iSrcCol;
-    int iSrcRow;
-    int iWeight;
-    double dRed;
-    double dGreen;
-    double dBlue;
-    double dAlpha;
-    double filterSize = 3.0;
-
-	int maxDim = (srcWidth>srcHeight)?srcWidth:srcHeight;
-	contrib = (FilterWeights*)malloc(maxDim * sizeof(FilterWeights));
-
-	//**------------------------------------------------------------------------
-    //** Need to create a temporary image to stuff the horizontally scaled image
-    //**------------------------------------------------------------------------
-    pTempImage = (RGBCOLOR *)malloc( destWidth * srcHeight * sizeof(RGBCOLOR) );
-    if ( pTempImage == NULL )
-    {
-        // -- GODOT start --
-        free( contrib );
-        // -- GODOT end --
-        return 0;
-    }
-
-    //**-------------------------------------------------------
-    //** Horizontally filter the image into the temporary image
-    //**-------------------------------------------------------
-	bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X);
-	CalcContributions( srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib );
-    for ( iRow = 0; iRow < srcHeight; iRow++ )
-    {
-        for ( iCol = 0; iCol < destWidth; iCol++ )
-        {
-            dRed   = 0;
-            dGreen = 0;
-            dBlue  = 0;
-            dAlpha = 0;
-
-            for ( iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++ )
-            {
-                iSrcCol = iWeight + contrib[iCol].first;
-				if (bWrapHorizontal)
-				{
-					iSrcCol = (iSrcCol < 0) ? (srcWidth + iSrcCol) : (iSrcCol >= srcWidth) ? (iSrcCol - srcWidth) : iSrcCol;
-				}
-                pSrcPixel = pSrcImage + (iRow * srcWidth) + iSrcCol;
-                dRed   += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[0];
-                dGreen += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[1];
-                dBlue  += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[2];
-                dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[3];
-            }
-
-            pPixel = pTempImage + (iRow * destWidth) + iCol;
-			pPixel->rgba[0] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel->rgba[1] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel->rgba[2] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel->rgba[3] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dAlpha)));
-        }
-    }
-
-    //**-------------------------------------------------------
-    //** Vertically filter the image into the destination image
-    //**-------------------------------------------------------
-	bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y);
-	CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib);
-    for ( iCol = 0; iCol < destWidth; iCol++ )
-    {
-        for ( iRow = 0; iRow < destHeight; iRow++ )
-        {
-            dRed   = 0;
-            dGreen = 0;
-            dBlue  = 0;
-            dAlpha = 0;
-
-            for ( iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++ )
-            {
-                iSrcRow = iWeight + contrib[iRow].first;
-				if (bWrapVertical)
-				{
-					iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow;
-				}
-                pSrcPixel = pTempImage + (iSrcRow * destWidth) + iCol;
-                dRed   += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[0];
-                dGreen += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[1];
-                dBlue  += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[2];
-                dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[3];
-            }
-
-            pPixel = pDestImage + (iRow * destWidth) + iCol;
-            pPixel->rgba[0]   = (unsigned char)(std::max( 0.0, std::min( 255.0, dRed)));
-            pPixel->rgba[1] = (unsigned char)(std::max( 0.0, std::min( 255.0, dGreen)));
-            pPixel->rgba[2]  = (unsigned char)(std::max( 0.0, std::min( 255.0, dBlue)));
-            pPixel->rgba[3] = (unsigned char)(std::max( 0.0, std::min( 255.0, dAlpha)));
-        }
-    }
-
-    free( pTempImage );
-	free( contrib );
-
-    return 1;
-}
-
-//**-------------------------------------------------------------------------
-//** Name: FilterResample(RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDstImage, int dstWidth, int dstHeight)
-//** Returns: 1
-//** Description: This function runs a 2d box filter over the srouce image
-//** to produce the destination image.
-//**-------------------------------------------------------------------------
-void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                     RGBCOLOR *pDstImage, int dstWidth, int dstHeight )
-{
-    int iRow;
-    int iCol;
-    int iSampleRow;
-    int iSampleCol;
-    int iFirstSampleRow;
-    int iFirstSampleCol;
-    int iLastSampleRow;
-    int iLastSampleCol;
-    int red;
-    int green;
-    int blue;
-    int alpha;
-    int samples;
-    float xScale;
-    float yScale;
-
-    RGBCOLOR *pSrcPixel;
-    RGBCOLOR *pDstPixel;
-
-    xScale = (float)srcWidth / dstWidth;
-    yScale = (float)srcHeight / dstHeight;
-
-    for ( iRow = 0; iRow < dstHeight; iRow++ )
-    {
-        for ( iCol = 0; iCol < dstWidth; iCol++ )
-        {
-            iFirstSampleRow = (int)(iRow * yScale);
-            iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1);
-            if ( iLastSampleRow >= srcHeight )
-            {
-                iLastSampleRow = srcHeight - 1;
-            }
-
-            iFirstSampleCol = (int)(iCol * xScale);
-            iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1);
-            if ( iLastSampleCol >= srcWidth )
-            {
-                iLastSampleCol = srcWidth - 1;
-            }
-
-            samples = 0;
-            red     = 0;
-            green   = 0;
-            blue    = 0;
-            alpha   = 0;
-            for ( iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++ )
-            {
-                for ( iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++ )
-                {
-                    pSrcPixel = pSrcImage + iSampleRow * srcWidth + iSampleCol;
-                    red   += pSrcPixel->rgba[0];
-                    green += pSrcPixel->rgba[1];
-                    blue  += pSrcPixel->rgba[2];
-                    alpha += pSrcPixel->rgba[3];
-
-                    samples++;
-                }
-            }
-
-            pDstPixel = pDstImage + iRow * dstWidth + iCol;
-            if ( samples > 0 )
-            {
-                pDstPixel->rgba[0] = static_cast<uint8_t>(red / samples);
-                pDstPixel->rgba[1] = static_cast<uint8_t>(green / samples);
-                pDstPixel->rgba[2] = static_cast<uint8_t>(blue / samples);
-                pDstPixel->rgba[3] = static_cast<uint8_t>(alpha / samples);
-            }
-            else
-            {
-                pDstPixel->rgba[0] = static_cast<uint8_t>(red);
-                pDstPixel->rgba[1] = static_cast<uint8_t>(green);
-                pDstPixel->rgba[2] = static_cast<uint8_t>(blue);
-                pDstPixel->rgba[3] = static_cast<uint8_t>(alpha);
-            }
-        }
-    }
-}
-
-
-}
-\ No newline at end of file
diff --git a/thirdparty/etc2comp/EtcFilter.h b/thirdparty/etc2comp/EtcFilter.h
deleted file mode 100644
index fcf125c6df..0000000000
--- a/thirdparty/etc2comp/EtcFilter.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#pragma once
-#include <stdint.h>
-#include <algorithm>
-
-namespace Etc
-{
-
-enum FilterEnums
-{
-	MaxFilterSize = 32
-};
-
-enum WrapFlags
-{
-	FILTER_WRAP_NONE = 0,
-	FILTER_WRAP_X = 0x1,
-	FILTER_WRAP_Y = 0x2
-};
-
-typedef struct tagFilterWeights
-{
-	int   first;
-	int   numWeights;
-	double weight[MaxFilterSize * 2 + 1];
-} FilterWeights;
-
-typedef struct tagRGBCOLOR
-{
-	union
-	{
-		uint32_t ulColor;
-		uint8_t rgba[4];
-	};
-} RGBCOLOR;
-
-
-double FilterBox( double t );
-double FilterLinear( double t );
-double FilterLanczos3( double t );
-
-int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                    RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) );
-void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, 
-                     RGBCOLOR *pDstImage, int dstWidth, int dstHeight );
-
-
-void CalcContributions(int srcSize, int destSize, double filterSize, bool wrap, double(*FilterProc)(double), FilterWeights contrib[]);
-
-template <typename T>
-void FilterResample(T *pSrcImage, int srcWidth, int srcHeight, T *pDstImage, int dstWidth, int dstHeight)
-{
-	float xScale;
-	float yScale;
-
-	T *pSrcPixel;
-	T *pDstPixel;
-
-	xScale = (float)srcWidth / dstWidth;
-	yScale = (float)srcHeight / dstHeight;
-
-	for (int iRow = 0; iRow < dstHeight; iRow++)
-	{
-		for (int iCol = 0; iCol < dstWidth; iCol++)
-		{
-			int samples;
-			int iFirstSampleRow;
-			int iFirstSampleCol;
-			int iLastSampleRow;
-			int iLastSampleCol;
-			float red;
-			float green;
-			float blue;
-			float alpha;
-
-			iFirstSampleRow = (int)(iRow * yScale);
-			iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1);
-			if (iLastSampleRow >= srcHeight)
-			{
-				iLastSampleRow = srcHeight - 1;
-			}
-
-			iFirstSampleCol = (int)(iCol * xScale);
-			iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1);
-			if (iLastSampleCol >= srcWidth)
-			{
-				iLastSampleCol = srcWidth - 1;
-			}
-
-			samples = 0;
-			red = 0.f;
-			green = 0.f;
-			blue = 0.f;
-			alpha = 0.f;
-			for (int iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++)
-			{
-				for (int iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++)
-				{
-					pSrcPixel = pSrcImage + (iSampleRow * srcWidth + iSampleCol) * 4;
-					red += static_cast<float>(pSrcPixel[0]);
-					green += static_cast<float>(pSrcPixel[1]);
-					blue += static_cast<float>(pSrcPixel[2]);
-					alpha += static_cast<float>(pSrcPixel[3]);
-
-					samples++;
-				}
-			}
-
-			pDstPixel = pDstImage + (iRow * dstWidth + iCol) * 4;
-			if (samples > 0)
-			{
-				pDstPixel[0] = static_cast<T>(red / samples);
-				pDstPixel[1] = static_cast<T>(green / samples);
-				pDstPixel[2] = static_cast<T>(blue / samples);
-				pDstPixel[3] = static_cast<T>(alpha / samples);
-			}
-			else
-			{
-				pDstPixel[0] = static_cast<T>(red);
-				pDstPixel[1] = static_cast<T>(green);
-				pDstPixel[2] = static_cast<T>(blue);
-				pDstPixel[3] = static_cast<T>(alpha);
-			}
-		}
-	}
-
-}
-
-//**-------------------------------------------------------------------------
-//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, 
-//**                       int srcWidth, int srcHeight, 
-//**                       RGBCOLOR *pDestImage, 
-//**                       int destWidth, int destHeight, 
-//**                       double (*FilterProc)(double) )
-//** Returns: 0 on failure and 1 on success
-//** Description: Filters a 2d image with a two pass filter by averaging the
-//**    weighted contributions of the pixels within the filter region.  The
-//**    contributions are determined by a weighting function parameter.
-//**-------------------------------------------------------------------------
-template <typename T>
-int FilterTwoPass(T *pSrcImage, int srcWidth, int srcHeight,
-	T *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double(*FilterProc)(double))
-{
-	const int numComponents = 4;
-	FilterWeights *contrib;
-	T *pPixel;
-	T *pTempImage;
-	double dRed;
-	double dGreen;
-	double dBlue;
-	double dAlpha;
-	double filterSize = 3.0;
-
-	int maxDim = (srcWidth>srcHeight) ? srcWidth : srcHeight;
-	contrib = new FilterWeights[maxDim];
-
-	//**------------------------------------------------------------------------
-	//** Need to create a temporary image to stuff the horizontally scaled image
-	//**------------------------------------------------------------------------
-	pTempImage = new T[destWidth * srcHeight * numComponents];
-	if (pTempImage == NULL)
-	{
-		return 0;
-	}
-
-	//**-------------------------------------------------------
-	//** Horizontally filter the image into the temporary image
-	//**-------------------------------------------------------
-	bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X);
-	CalcContributions(srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib);
-	for (int iRow = 0; iRow < srcHeight; iRow++)
-	{
-		for (int iCol = 0; iCol < destWidth; iCol++)
-		{
-			dRed = 0;
-			dGreen = 0;
-			dBlue = 0;
-			dAlpha = 0;
-
-			for (int iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++)
-			{
-				int iSrcCol = iWeight + contrib[iCol].first;
-				if(bWrapHorizontal)
-				{
-					iSrcCol = (iSrcCol < 0)?(srcWidth+iSrcCol):(iSrcCol >= srcWidth)?(iSrcCol-srcWidth):iSrcCol;
-				}
-				T* pSrcPixel = pSrcImage + ((iRow * srcWidth) + iSrcCol)*numComponents;
-				dRed += contrib[iCol].weight[iWeight] * pSrcPixel[0];
-				dGreen += contrib[iCol].weight[iWeight] * pSrcPixel[1];
-				dBlue += contrib[iCol].weight[iWeight] * pSrcPixel[2];
-				dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel[3];
-			}
-
-			pPixel = pTempImage + ((iRow * destWidth) + iCol)*numComponents;
-			pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha)));
-		}
-	}
-
-	//**-------------------------------------------------------
-	//** Vertically filter the image into the destination image
-	//**-------------------------------------------------------
-	bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y);
-	CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib);
-	for (int iCol = 0; iCol < destWidth; iCol++)
-	{
-		for (int iRow = 0; iRow < destHeight; iRow++)
-		{
-			dRed = 0;
-			dGreen = 0;
-			dBlue = 0;
-			dAlpha = 0;
-
-			for (int iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++)
-			{
-				int iSrcRow = iWeight + contrib[iRow].first;
-				if (bWrapVertical)
-				{
-					iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow;
-				}
-				T* pSrcPixel = pTempImage + ((iSrcRow * destWidth) + iCol)*numComponents;
-				dRed += contrib[iRow].weight[iWeight] * pSrcPixel[0];
-				dGreen += contrib[iRow].weight[iWeight] * pSrcPixel[1];
-				dBlue += contrib[iRow].weight[iWeight] * pSrcPixel[2];
-				dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel[3];
-			}
-
-			pPixel = pDestImage + ((iRow * destWidth) + iCol)*numComponents;
-			pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed)));
-			pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen)));
-			pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue)));
-			pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha)));
-		}
-	}
-
-	delete[] pTempImage;
-	delete[] contrib;
-
-	return 1;
-}
-
-
-}
-\ No newline at end of file
diff --git a/thirdparty/etc2comp/EtcImage.cpp b/thirdparty/etc2comp/EtcImage.cpp
deleted file mode 100644
index 7a1058844d..0000000000
--- a/thirdparty/etc2comp/EtcImage.cpp
+++ /dev/null
@@ -1,685 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcImage.cpp
-
-Image is an array of 4x4 blocks that represent the encoding of the source image
-
-*/
-
-#include "EtcConfig.h"
-
-#include <stdlib.h>
-
-#include "EtcImage.h"
-
-#include "Etc.h"
-#include "EtcBlock4x4.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcSortedBlockList.h"
-
-#if ETC_WINDOWS
-#include <windows.h>
-#endif
-#include <ctime>
-#include <chrono>
-#include <future>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-// fix conflict with Block4x4::AlphaMix
-#ifdef OPAQUE
-#undef OPAQUE
-#endif
-#ifdef TRANSPARENT
-#undef TRANSPARENT
-#endif
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Image::Image(void)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_warningsToCapture = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = nullptr;
-
-		m_pablock = nullptr;
-
-		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-		m_uiEncodingBitsBytes = 0;
-		m_paucEncodingBits = nullptr;
-
-		m_format = Format::UNKNOWN;
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// constructor using source image
-	// used to set state before Encode() is called
-	//
-	Image::Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
-					unsigned int a_uiSourceHeight, 
-					ErrorMetric a_errormetric)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_warningsToCapture = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = (ColorFloatRGBA *) a_pafSourceRGBA;
-		m_uiSourceWidth = a_uiSourceWidth;
-		m_uiSourceHeight = a_uiSourceHeight;
-
-		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-		m_uiBlockColumns = m_uiExtendedWidth >> 2;
-		m_uiBlockRows = m_uiExtendedHeight >> 2;
-
-		m_pablock = new Block4x4[GetNumberOfBlocks()];
-		assert(m_pablock);
-
-		m_format = Format::UNKNOWN;
-
-		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-		m_uiEncodingBitsBytes = 0;
-		m_paucEncodingBits = nullptr;
-
-		m_errormetric = a_errormetric;
-		m_fEffort = 0.0f;
-
-		m_iEncodeTime_ms = -1;
-
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-		m_bVerboseOutput = false;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// constructor using encoding bits
-	// recreates encoding state using a previously encoded image
-	//
-	Image::Image(Format a_format,
-					unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-					unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
-					Image *a_pimageSource, ErrorMetric a_errormetric)
-	{
-		m_encodingStatus = EncodingStatus::SUCCESS;
-		m_pafrgbaSource = nullptr;
-		m_uiSourceWidth = a_uiSourceWidth;
-		m_uiSourceHeight = a_uiSourceHeight;
-
-		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
-		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
-
-		m_uiBlockColumns = m_uiExtendedWidth >> 2;
-		m_uiBlockRows = m_uiExtendedHeight >> 2;
-
-		unsigned int uiBlocks = GetNumberOfBlocks();
-
-		m_pablock = new Block4x4[uiBlocks];
-		assert(m_pablock);
-
-		m_format = a_format;
-
-		m_iNumOpaquePixels = 0;
-		m_iNumTranslucentPixels = 0;
-		m_iNumTransparentPixels = 0;
-		
-		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
-		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
-			return;
-		}
-		m_uiEncodingBitsBytes = a_uiEncodingBitsBytes;
-		m_paucEncodingBits = a_paucEncidingBits;
-
-		m_errormetric = a_errormetric;
-		m_fEffort = 0.0f;
-		m_bVerboseOutput = false;
-		m_iEncodeTime_ms = -1;
-		
-		unsigned char *paucEncodingBits = m_paucEncodingBits;
-		unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-
-		unsigned int uiH = 0;
-		unsigned int uiV = 0;
-		for (unsigned int uiBlock = 0; uiBlock < uiBlocks; uiBlock++)
-		{
-			m_pablock[uiBlock].InitFromEtcEncodingBits(a_format, uiH, uiV, paucEncodingBits, 
-														a_pimageSource, a_errormetric);
-			paucEncodingBits += uiEncodingBitsBytesPerBlock;
-			uiH += 4;
-			if (uiH >= m_uiSourceWidth)
-			{
-				uiH = 0;
-				uiV += 4;
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	Image::~Image(void)
-	{
-		if (m_pablock != nullptr)
-		{
-			delete[] m_pablock;
-			m_pablock = nullptr;
-		}
-
-		/*if (m_paucEncodingBits != nullptr)
-		{
-			delete[] m_paucEncodingBits;
-			m_paucEncodingBits = nullptr;
-		}*/
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// encode an image
-	// create a set of encoding bits that conforms to a_format
-	// find best fit using a_errormetric
-	// explore a range of possible encodings based on a_fEffort (range = [0:100])
-	// speed up process using a_uiJobs as the number of process threads (a_uiJobs must not excede a_uiMaxJobs)
-	//
-	Image::EncodingStatus Image::Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, unsigned int a_uiJobs, unsigned int a_uiMaxJobs)
-	{
-
-		auto start = std::chrono::steady_clock::now();
-		
-		m_encodingStatus = EncodingStatus::SUCCESS;
-
-		m_format = a_format;
-		m_errormetric = a_errormetric;
-		m_fEffort = a_fEffort;
-
-		if (m_errormetric < 0 || m_errormetric > ERROR_METRICS)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_ERROR_METRIC);
-			return m_encodingStatus;
-		}
-
-		if (m_fEffort < ETCCOMP_MIN_EFFORT_LEVEL)
-		{
-			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
-			m_fEffort = ETCCOMP_MIN_EFFORT_LEVEL;
-		}
-		else if (m_fEffort > ETCCOMP_MAX_EFFORT_LEVEL)
-		{
-			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
-			m_fEffort = ETCCOMP_MAX_EFFORT_LEVEL;
-		}
-		if (a_uiJobs < 1)
-		{
-			a_uiJobs = 1;
-			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
-		}
-		else if (a_uiJobs > a_uiMaxJobs)
-		{
-			a_uiJobs = a_uiMaxJobs;
-			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
-		}
-
-		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
-
-		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
-		{
-			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
-			return m_encodingStatus;
-		}
-
-		assert(m_paucEncodingBits == nullptr);
-		m_uiEncodingBitsBytes = GetNumberOfBlocks() * Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-		m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes];
-
-		InitBlocksAndBlockSorter();
-
-
-		std::future<void> *handle = new std::future<void>[a_uiMaxJobs];
-
-		unsigned int uiNumThreadsNeeded = 0;
-		unsigned int uiUnfinishedBlocks = GetNumberOfBlocks();
-
-		uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
-			
-		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-		{
-			handle[i] = async(std::launch::async, &Image::RunFirstPass, this, i, uiNumThreadsNeeded);
-		}
-
-		RunFirstPass(uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
-
-		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-		{
-			handle[i].get();
-		}
-
-		// perform effort-based encoding
-		if (m_fEffort > ETCCOMP_MIN_EFFORT_LEVEL)
-		{
-			unsigned int uiFinishedBlocks = 0;
-			unsigned int uiTotalEffortBlocks = static_cast<unsigned int>(roundf(0.01f * m_fEffort  * GetNumberOfBlocks()));
-
-			if (m_bVerboseOutput)
-			{
-				printf("effortblocks = %d\n", uiTotalEffortBlocks);
-			}
-			unsigned int uiPass = 0;
-			while (1)
-			{
-				if (m_bVerboseOutput)
-				{
-					uiPass++;
-					printf("pass %u\n", uiPass);
-				}
-				m_psortedblocklist->Sort();
-				uiUnfinishedBlocks = m_psortedblocklist->GetNumberOfSortedBlocks();
-				uiFinishedBlocks = GetNumberOfBlocks() - uiUnfinishedBlocks;
-				if (m_bVerboseOutput)
-				{
-					printf("    %u unfinished blocks\n", uiUnfinishedBlocks);
-					// m_psortedblocklist->Print();
-				}
-
-				
-
-				//stop enocding when we did enough to satify the effort percentage
-				if (uiFinishedBlocks >= uiTotalEffortBlocks)
-				{
-					if (m_bVerboseOutput)
-					{
-						printf("Finished %d Blocks out of %d\n", uiFinishedBlocks, uiTotalEffortBlocks);
-					}
-					break;
-				}
-
-				unsigned int uiIteratedBlocks = 0;
-				unsigned int blocksToIterateThisPass = (uiTotalEffortBlocks - uiFinishedBlocks);
-				uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
-
-				if (uiNumThreadsNeeded <= 1)
-				{
-					//since we already how many blocks each thread will process
-					//cap the thread limit to do the proper amount of work, and not more
-					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, 0, 1);
-				}
-				else
-				{
-					//we have a lot of work to do, so lets multi thread it
-					std::future<unsigned int> *handleToBlockEncoders = new std::future<unsigned int>[uiNumThreadsNeeded-1];
-
-					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-					{
-						handleToBlockEncoders[i] = async(std::launch::async, &Image::IterateThroughWorstBlocks, this, blocksToIterateThisPass, i, uiNumThreadsNeeded);
-					}
-					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
-
-					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
-					{
-						uiIteratedBlocks += handleToBlockEncoders[i].get();
-					}
-
-					delete[] handleToBlockEncoders;
-				}
-
-				if (m_bVerboseOutput)
-				{
-					printf("    %u iterated blocks\n", uiIteratedBlocks);
-				}
-			}
-		}
-
-		// generate Etc2-compatible bit-format 4x4 blocks
-		for (int i = 0; i < (int)a_uiJobs - 1; i++)
-		{
-			handle[i] = async(std::launch::async, &Image::SetEncodingBits, this, i, a_uiJobs);
-		}
-		SetEncodingBits(a_uiJobs - 1, a_uiJobs);
-
-		for (int i = 0; i < (int)a_uiJobs - 1; i++)
-		{
-			handle[i].get();
-		}
-
-		auto end = std::chrono::steady_clock::now();
-		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
-		m_iEncodeTime_ms = (int)elapsed.count();
-
-		delete[] handle;
-		delete m_psortedblocklist;
-		return m_encodingStatus;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// iterate the encoding thru the blocks with the worst error
-	// stop when a_uiMaxBlocks blocks have been iterated
-	// split the blocks between the process threads using a_uiMultithreadingOffset and a_uiMultithreadingStride
-	//
-	unsigned int Image::IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, 
-													unsigned int a_uiMultithreadingOffset, 
-													unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-		unsigned int uiIteratedBlocks = a_uiMultithreadingOffset;
-
-		SortedBlockList::Link *plink = m_psortedblocklist->GetLinkToFirstBlock();
-		for (plink = plink->Advance(a_uiMultithreadingOffset);
-				plink != nullptr;
-				plink = plink->Advance(a_uiMultithreadingStride) )
-		{
-			if (uiIteratedBlocks >= a_uiMaxBlocks)
-			{
-				break;
-			}
-
-			plink->GetBlock()->PerformEncodingIteration(m_fEffort);
-
-			uiIteratedBlocks += a_uiMultithreadingStride;	
-		}
-
-		return uiIteratedBlocks;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// determine which warnings to check for during Encode() based on encoding format
-	//
-	void Image::FindEncodingWarningTypesForCurFormat()
-	{
-		TrackEncodingWarning(WARNING_ALL_TRANSPARENT_PIXELS);
-		TrackEncodingWarning(WARNING_SOME_RGBA_NOT_0_TO_1);
-		switch (m_format)
-		{
-		case Image::Format::ETC1:
-		case Image::Format::RGB8:
-		case Image::Format::SRGB8:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			break;
-
-		case Image::Format::RGB8A1:
-		case Image::Format::SRGB8A1:
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
-			break;
-		case Image::Format::RGBA8:
-		case Image::Format::SRGBA8:
-			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
-			break;
-
-		case Image::Format::R11:
-		case Image::Format::SIGNED_R11:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
-			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-			break;
-
-		case Image::Format::RG11:
-		case Image::Format::SIGNED_RG11:
-			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
-			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-			break;
-		case Image::Format::FORMATS:
-		case Image::Format::UNKNOWN:
-		default:
-			assert(0);
-			break;
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// examine source pixels to check for warnings
-	//
-	void Image::FindAndSetEncodingWarnings()
-	{
-		int numPixels = (m_uiBlockRows * 4) * (m_uiBlockColumns * 4);
-		if (m_iNumOpaquePixels == numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_OPAQUE_PIXELS);
-		}
-		if (m_iNumOpaquePixels < numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_NON_OPAQUE_PIXELS);
-		}
-		if (m_iNumTranslucentPixels > 0)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_TRANSLUCENT_PIXELS);
-		}
-		if (m_iNumTransparentPixels == numPixels)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_TRANSPARENT_PIXELS);
-		}
-		if (m_numColorValues.fB > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
-		}
-		if (m_numColorValues.fG > 0.0f) 
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
-		}
-
-		if (m_numOutOfRangeValues.fR > 0.0f || m_numOutOfRangeValues.fG > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
-		}
-		if (m_numOutOfRangeValues.fB > 0.0f || m_numOutOfRangeValues.fA > 0.0f)
-		{
-			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
-		}
-	}
-	
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for a given image format
-	//
-	const char * Image::EncodingFormatToString(Image::Format a_format)
-	{
-		switch (a_format)
-		{
-		case Image::Format::ETC1:
-			return "ETC1";
-		case Image::Format::RGB8:
-			return "RGB8";
-		case Image::Format::SRGB8:
-			return "SRGB8";
-
-		case Image::Format::RGB8A1:
-			return "RGB8A1";
-		case Image::Format::SRGB8A1:
-			return "SRGB8A1";
-		case Image::Format::RGBA8:
-			return "RGBA8";
-		case Image::Format::SRGBA8:
-			return "SRGBA8";
-
-		case Image::Format::R11:
-			return "R11";
-		case Image::Format::SIGNED_R11:
-			return "SIGNED_R11";
-
-		case Image::Format::RG11:
-			return "RG11";
-		case Image::Format::SIGNED_RG11:
-			return "SIGNED_RG11";
-		case Image::Format::FORMATS:
-		case Image::Format::UNKNOWN:
-		default:
-			return "UNKNOWN";
-		}
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return a string name for the image's format
-	//
-	const char * Image::EncodingFormatToString(void)
-	{
-		return EncodingFormatToString(m_format);
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// init image blocks prior to encoding
-	// init block sorter for subsequent sortings
-	// check for encoding warnings
-	//
-	void Image::InitBlocksAndBlockSorter(void)
-	{
-		
-		FindEncodingWarningTypesForCurFormat();
-
-		// init each block
-		Block4x4 *pblock = m_pablock;
-		unsigned char *paucEncodingBits = m_paucEncodingBits;
-		for (unsigned int uiBlockRow = 0; uiBlockRow < m_uiBlockRows; uiBlockRow++)
-		{
-			unsigned int uiBlockV = uiBlockRow * 4;
-
-			for (unsigned int uiBlockColumn = 0; uiBlockColumn < m_uiBlockColumns; uiBlockColumn++)
-			{
-				unsigned int uiBlockH = uiBlockColumn * 4;
-
-				pblock->InitFromSource(this, uiBlockH, uiBlockV, paucEncodingBits, m_errormetric);
-
-				paucEncodingBits += Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
-
-				pblock++;
-			}
-		}
-
-		FindAndSetEncodingWarnings();
-
-		// init block sorter
-		{
-			m_psortedblocklist = new SortedBlockList(GetNumberOfBlocks(), 100);
-
-			for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
-			{
-				pblock = &m_pablock[uiBlock];
-				m_psortedblocklist->AddBlock(pblock);
-			}
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// run the first pass of the encoder
-	// the encoder generally finds a reasonable, fast encoding
-	// this is run on all blocks regardless of effort to ensure that all blocks have a valid encoding
-	//
-	void Image::RunFirstPass(unsigned int a_uiMultithreadingOffset, unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-
-		for (unsigned int uiBlock = a_uiMultithreadingOffset;
-				uiBlock < GetNumberOfBlocks(); 
-				uiBlock += a_uiMultithreadingStride)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			pblock->PerformEncodingIteration(m_fEffort);
-		}
-	}
-
-    // ----------------------------------------------------------------------------------------------------
-	// set the encoding bits (for the output file) based on the best encoding for each block
-	//
-	void Image::SetEncodingBits(unsigned int a_uiMultithreadingOffset,
-								unsigned int a_uiMultithreadingStride)
-	{
-		assert(a_uiMultithreadingStride > 0);
-
-		for (unsigned int uiBlock = a_uiMultithreadingOffset; 
-				uiBlock < GetNumberOfBlocks(); 
-				uiBlock += a_uiMultithreadingStride)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			pblock->SetEncodingBitsFromEncoding();
-		}
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// return the image error
-	// image error is the sum of all block errors
-	//
-	float Image::GetError(void)
-	{
-		float fError = 0.0f;
-
-		for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
-		{
-			Block4x4 *pblock = &m_pablock[uiBlock];
-			fError += pblock->GetError();
-		}
-
-		return fError;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// determine the encoding bits format based on the encoding format
-	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
-	//
-	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
-	{
-		Block4x4EncodingBits::Format encodingbitsformat;
-
-		// determine encoding bits format from image format
-		switch (a_format)
-		{
-		case Format::ETC1:
-		case Format::RGB8:
-		case Format::SRGB8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
-			break;
-
-		case Format::RGBA8:
-		case Format::SRGBA8:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
-			break;
-
-		case Format::R11:
-		case Format::SIGNED_R11:
-			encodingbitsformat = Block4x4EncodingBits::Format::R11;
-			break;
-
-		case Format::RG11:
-		case Format::SIGNED_RG11:
-			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
-			break;
-
-		case Format::RGB8A1:
-		case Format::SRGB8A1:
-			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
-			break;
-
-		default:
-			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
-			break;
-		}
-
-		return encodingbitsformat;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-}	// namespace Etc
diff --git a/thirdparty/etc2comp/EtcImage.h b/thirdparty/etc2comp/EtcImage.h
deleted file mode 100644
index bd807ac32e..0000000000
--- a/thirdparty/etc2comp/EtcImage.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//#include "Etc.h"
-#include "EtcColorFloatRGBA.h"
-#include "EtcBlock4x4EncodingBits.h"
-#include "EtcErrorMetric.h"
-
-
-namespace Etc
-{
-	class Block4x4;
-	class EncoderSpec;
-	class SortedBlockList;
-
-    class Image
-    {
-    public:
-
-		//the differnt warning and errors that can come up during encoding
-		enum  EncodingStatus
-		{
-			SUCCESS = 0,
-			//
-			WARNING_THRESHOLD = 1 << 0,
-			//
-			WARNING_EFFORT_OUT_OF_RANGE = 1 << 1,
-			WARNING_JOBS_OUT_OF_RANGE = 1 << 2,
-			WARNING_SOME_NON_OPAQUE_PIXELS = 1 << 3,//just for opaque formats, etc1, rgb8, r11, rg11
-			WARNING_ALL_OPAQUE_PIXELS = 1 << 4,
-			WARNING_ALL_TRANSPARENT_PIXELS = 1 << 5,
-			WARNING_SOME_TRANSLUCENT_PIXELS = 1 << 6,//just for rgb8A1
-			WARNING_SOME_RGBA_NOT_0_TO_1 = 1 << 7,
-			WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO = 1 << 8,
-			WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO = 1 << 9,
-			//
-			ERROR_THRESHOLD = 1 << 16,
-			//
-			ERROR_UNKNOWN_FORMAT = 1 << 17,
-			ERROR_UNKNOWN_ERROR_METRIC = 1 << 18,
-			ERROR_ZERO_WIDTH_OR_HEIGHT = 1 << 19,
-			//
-		};
-		
-		enum class Format
-		{
-			UNKNOWN,
-			//
-			ETC1,
-			//
-			// ETC2 formats
-			RGB8,
-			SRGB8,
-			RGBA8,
-			SRGBA8,
-			R11,
-			SIGNED_R11,
-			RG11,
-			SIGNED_RG11,
-			RGB8A1,
-			SRGB8A1,
-			//
-			FORMATS,
-			//
-			DEFAULT = SRGB8
-		};
-
-		// constructor using source image
-		Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
-				unsigned int a_uiSourceHeight,
-				ErrorMetric a_errormetric);
-
-		// constructor using encoding bits
-		Image(Format a_format, 
-				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
-				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
-				Image *a_pimageSource,
-				ErrorMetric a_errormetric);
-
-		~Image(void);
-
-		EncodingStatus Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, 
-			unsigned int a_uiJobs, unsigned int a_uiMaxJobs);
-
-		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
-		{
-			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
-		}
-		
-		inline unsigned int GetSourceWidth(void)
-		{
-			return m_uiSourceWidth;
-		}
-
-		inline unsigned int GetSourceHeight(void)
-		{
-			return m_uiSourceHeight;
-		}
-
-		inline unsigned int GetExtendedWidth(void)
-		{
-			return m_uiExtendedWidth;
-		}
-
-		inline unsigned int GetExtendedHeight(void)
-		{
-			return m_uiExtendedHeight;
-		}
-
-		inline unsigned int GetNumberOfBlocks()
-		{
-			return m_uiBlockColumns * m_uiBlockRows;
-		}
-
-		inline Block4x4 * GetBlocks()
-		{
-			return m_pablock;
-		}
-
-		inline unsigned char * GetEncodingBits(void)
-		{
-			return m_paucEncodingBits;
-		}
-
-		inline unsigned int GetEncodingBitsBytes(void)
-		{
-			return m_uiEncodingBitsBytes;
-		}
-
-		inline int GetEncodingTimeMs(void)
-		{
-			return m_iEncodeTime_ms;
-		}
-
-		float GetError(void);
-
-		inline ColorFloatRGBA * GetSourcePixel(unsigned int a_uiH, unsigned int a_uiV)
-		{
-			if (a_uiH >= m_uiSourceWidth || a_uiV >= m_uiSourceHeight)
-			{
-				return nullptr;
-			}
-
-			return &m_pafrgbaSource[a_uiV*m_uiSourceWidth + a_uiH];
-		}
-
-		inline Format GetFormat(void)
-		{
-			return m_format;
-		}
-
-		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
-
-		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
-		{
-			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
-		}
-
-		inline ErrorMetric GetErrorMetric(void)
-		{
-			return m_errormetric;
-		}
-
-		static const char * EncodingFormatToString(Image::Format a_format);
-		const char * EncodingFormatToString(void);
-		//used to get basic information about the image data
-		int m_iNumOpaquePixels;
-		int m_iNumTranslucentPixels;
-		int m_iNumTransparentPixels;
-
-		ColorFloatRGBA m_numColorValues;
-		ColorFloatRGBA m_numOutOfRangeValues;
-
-		bool m_bVerboseOutput;
-	private:
-		//add a warning or error to check for while encoding
-		inline void TrackEncodingWarning(EncodingStatus a_encStatus)
-		{
-			m_warningsToCapture = (EncodingStatus)((unsigned int)m_warningsToCapture | (unsigned int)a_encStatus);
-		}
-
-		//report the warning if it is something we care about for this encoding
-		inline void AddToEncodingStatusIfSignfigant(EncodingStatus a_encStatus)
-		{
-			if ((EncodingStatus)((unsigned int)m_warningsToCapture & (unsigned int)a_encStatus) == a_encStatus)
-			{
-				AddToEncodingStatus(a_encStatus);
-			}
-		}
-
-		Image(void);
-		void FindEncodingWarningTypesForCurFormat();
-		void FindAndSetEncodingWarnings();
-
-		void InitBlocksAndBlockSorter(void);
-
-		void RunFirstPass(unsigned int a_uiMultithreadingOffset, 
-							unsigned int a_uiMultithreadingStride);
-
-		void SetEncodingBits(unsigned int a_uiMultithreadingOffset,
-								unsigned int a_uiMultithreadingStride);
-
-		unsigned int IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks,
-												unsigned int a_uiMultithreadingOffset,
-												unsigned int a_uiMultithreadingStride);
-
-		// inputs
-		ColorFloatRGBA *m_pafrgbaSource;
-		unsigned int m_uiSourceWidth;
-		unsigned int m_uiSourceHeight;
-		unsigned int m_uiExtendedWidth;
-		unsigned int m_uiExtendedHeight;
-		unsigned int m_uiBlockColumns;
-		unsigned int m_uiBlockRows;
-		// intermediate data
-		Block4x4 *m_pablock;
-		// encoding
-		Format m_format;
-		Block4x4EncodingBits::Format m_encodingbitsformat;
-		unsigned int m_uiEncodingBitsBytes;		// for entire image
-		unsigned char *m_paucEncodingBits;
-		ErrorMetric m_errormetric;
-		float m_fEffort;
-		// stats
-		int m_iEncodeTime_ms;
-		
-		SortedBlockList *m_psortedblocklist;
-		//this will hold any warning or errors that happen during encoding
-		EncodingStatus m_encodingStatus;
-		//these will be the warnings we are tracking
-		EncodingStatus m_warningsToCapture;
-	};
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcIndividualTrys.cpp b/thirdparty/etc2comp/EtcIndividualTrys.cpp
deleted file mode 100644
index 56ff4c65ec..0000000000
--- a/thirdparty/etc2comp/EtcIndividualTrys.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcIndividualTrys.cpp
-
-Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcIndividualTrys.h"
-
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct a list of trys (encoding attempts)
-	//
-	// a_frgbaColor1 is the basecolor for the first half
-	// a_frgbaColor2 is the basecolor for the second half
-	// a_pauiPixelMapping1 is the pixel order for the first half
-	// a_pauiPixelMapping2 is the pixel order for the second half
-	// a_uiRadius is the amount to vary the base colors
-	//
-	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
-									const unsigned int *a_pauiPixelMapping1,
-									const unsigned int *a_pauiPixelMapping2,
-									unsigned int a_uiRadius)
-	{
-		assert(a_uiRadius <= MAX_RADIUS);
-
-		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
-		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
-
-		// quantize base colors
-		// ensure that trys with a_uiRadius don't overflow
-		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
-		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
-		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
-		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
-		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
-		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
-
-		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
-		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
-									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
-	{
-
-		m_iRed = a_iRed;
-		m_iGreen = a_iGreen;
-		m_iBlue = a_iBlue;
-
-		m_pauiPixelMapping = a_pauiPixelMapping;
-		m_uiRadius = a_uiRadius;
-
-		m_uiTrys = 0;
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcIndividualTrys.h b/thirdparty/etc2comp/EtcIndividualTrys.h
deleted file mode 100644
index 5fb12fbcf4..0000000000
--- a/thirdparty/etc2comp/EtcIndividualTrys.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "EtcColorFloatRGBA.h"
-
-namespace Etc
-{
-
-	class IndividualTrys
-	{
-	public:
-
-		static const unsigned int MAX_RADIUS = 1;
-
-		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
-						ColorFloatRGBA a_frgbaColor2,
-						const unsigned int *a_pauiPixelMapping1,
-						const unsigned int *a_pauiPixelMapping2,
-						unsigned int a_uiRadius);
-
-		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
-		{
-			if (a_i < (0+ a_iDistance))
-			{
-				return (0 + a_iDistance);
-			}
-			else if (a_i > (15- a_iDistance))
-			{
-				return (15 - a_iDistance);
-			}
-
-			return a_i;
-		}
-
-		class Try
-		{
-        public :
-			static const unsigned int SELECTORS = 8;	// per half
-
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-			unsigned int m_uiCW;
-			unsigned int m_auiSelectors[SELECTORS];
-			float m_fError;
-        };
-
-		class Half
-		{
-		public:
-
-			static const unsigned int MAX_TRYS = 27;
-
-			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
-						const unsigned int *a_pauiPixelMapping,
-						unsigned int a_uiRadius);
-
-			// center of trys
-			int m_iRed;
-			int m_iGreen;
-			int m_iBlue;
-
-			const unsigned int *m_pauiPixelMapping;
-			unsigned int m_uiRadius;
-
-			unsigned int m_uiTrys;
-			Try m_atry[MAX_TRYS];
-
-			Try *m_ptryBest;
-		};
-
-		Half m_half1;
-		Half m_half2;
-
-	};
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcMath.cpp b/thirdparty/etc2comp/EtcMath.cpp
deleted file mode 100644
index 096d5f7ab9..0000000000
--- a/thirdparty/etc2comp/EtcMath.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EtcConfig.h"
-#include "EtcMath.h"
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
-	// use a_fSlope and a_fOffset to define that line
-	//
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset)
-	{
-		float fPoints = (float)a_Points;
-
-		float fSumX = 0.0f;
-		float fSumY = 0.0f;
-		float fSumXY = 0.0f;
-		float fSumX2 = 0.0f;
-
-		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
-		{
-			fSumX += a_afX[uiPoint];
-			fSumY += a_afY[uiPoint];
-			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
-			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
-		}
-
-		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
-
-		// if vertical line
-		if (fDivisor == 0.0f)
-		{
-			*a_fSlope = 0.0f;
-			*a_fOffset = 0.0f;
-			return true;
-		}
-
-		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
-		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
-
-		return false;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/EtcMath.h b/thirdparty/etc2comp/EtcMath.h
deleted file mode 100644
index c58c9a91bc..0000000000
--- a/thirdparty/etc2comp/EtcMath.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <math.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// return true if vertical line
-	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
-					float *a_fSlope, float *a_fOffset);
-
-	inline float ConvertMSEToPSNR(float a_fMSE)
-	{
-		if (a_fMSE == 0.0f)
-		{
-			return INFINITY;
-		}
-
-		return 10.0f * log10f(1.0f / a_fMSE);
-	}
-
-
-}
diff --git a/thirdparty/etc2comp/EtcSortedBlockList.cpp b/thirdparty/etc2comp/EtcSortedBlockList.cpp
deleted file mode 100644
index bfa6b7b3fa..0000000000
--- a/thirdparty/etc2comp/EtcSortedBlockList.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-EtcSortedBlockList.cpp
-
-SortedBlockList is a list of 4x4 blocks that can be used by the "effort" system to prioritize
-the encoding of the 4x4 blocks.
-
-The sorting is done with buckets, where each bucket is an indication of how much error each 4x4 block has
-
-*/
-
-#include "EtcConfig.h"
-#include "EtcSortedBlockList.h"
-
-#include "EtcBlock4x4.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-namespace Etc
-{
-
-	// ----------------------------------------------------------------------------------------------------
-	// construct an empty list
-	//
-	// allocate enough memory to add all of the image's 4x4 blocks later
-	// allocate enough buckets to sort the blocks
-	//
-	SortedBlockList::SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets)
-	{
-		m_uiImageBlocks = a_uiImageBlocks;
-		m_iBuckets = (int)a_uiBuckets;
-
-		m_uiAddedBlocks = 0;
-		m_uiSortedBlocks = 0;
-		m_palinkPool = new Link[m_uiImageBlocks];
-		m_pabucket = new Bucket[m_iBuckets];
-		m_fMaxError = 0.0f;
-
-		InitBuckets();
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	//
-	SortedBlockList::~SortedBlockList(void)
-	{
-		delete[] m_palinkPool;
-		delete[] m_pabucket;
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-    // add a 4x4 block to the list
-	// the 4x4 block will be sorted later
-	//
-    void SortedBlockList::AddBlock(Block4x4 *a_pblock)
-    {
-        assert(m_uiAddedBlocks < m_uiImageBlocks);
-        Link *plink = &m_palinkPool[m_uiAddedBlocks++];
-		plink->Init(a_pblock);
-    }
-
-	// ----------------------------------------------------------------------------------------------------
-	// sort all of the 4x4 blocks that have been added to the list
-	//
-	// first, determine the maximum error, then assign an error range to each bucket
-	// next, determine which bucket each 4x4 block belongs to based on the 4x4 block's error
-	// add the 4x4 block to the appropriate bucket
-	// lastly, walk thru the buckets and add each bucket to a sorted linked list
-	//
-	// the resultant sorting is an approximate sorting from most to least error
-	//
-    void SortedBlockList::Sort(void)
-    {
-		assert(m_uiAddedBlocks == m_uiImageBlocks);
-        InitBuckets();
-
-        // find max block error
-        m_fMaxError = -1.0f;
-
-        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
-        {
-            Link *plinkBlock = &m_palinkPool[uiLink];
-
-            float fBlockError = plinkBlock->GetBlock()->GetError();
-            if (fBlockError > m_fMaxError)
-            {
-                m_fMaxError = fBlockError;
-            }
-        }
-        // prevent divide by zero or divide by negative
-        if (m_fMaxError <= 0.0f)
-        {
-            m_fMaxError = 1.0f;
-        }
-		//used for debugging
-		//int numDone = 0;
-        // put all of the blocks with unfinished encodings into the appropriate bucket
-		m_uiSortedBlocks = 0;
-        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
-        {
-            Link *plinkBlock = &m_palinkPool[uiLink];
-
-			// if the encoding is done, don't add it to the list
-			if (plinkBlock->GetBlock()->GetEncoding()->IsDone())
-			{
-				//numDone++;
-				continue;
-			}
-
-            // calculate the appropriate sort bucket
-            float fBlockError = plinkBlock->GetBlock()->GetError();
-            int iBucket = (int) floorf(m_iBuckets * fBlockError / m_fMaxError);
-            // clamp to bucket index
-            iBucket = iBucket < 0 ? 0 : iBucket >= m_iBuckets ? m_iBuckets - 1 : iBucket;
-
-            // add block to bucket
-			{
-				Bucket *pbucket = &m_pabucket[iBucket];
-				if (pbucket->plinkLast)
-				{
-					pbucket->plinkLast->SetNext(plinkBlock);
-					pbucket->plinkLast = plinkBlock;
-				}
-				else
-				{
-					pbucket->plinkFirst = pbucket->plinkLast = plinkBlock;
-				}
-				plinkBlock->SetNext(nullptr);
-			}
-
-			m_uiSortedBlocks++;
-
-            if (0)
-            {
-                printf("%u: e=%.3f\n", uiLink, fBlockError);
-                Print();
-                printf("\n\n\n");
-            }
-        }
-		//printf("num blocks already done: %d\n",numDone);
-		//link the blocks together across buckets
-		m_plinkFirst = nullptr;
-		m_plinkLast = nullptr;
-		for (int iBucket = m_iBuckets - 1; iBucket >= 0; iBucket--)
-		{
-			Bucket *pbucket = &m_pabucket[iBucket];
-
-			if (pbucket->plinkFirst)
-			{
-				if (m_plinkFirst == nullptr)
-				{
-					m_plinkFirst = pbucket->plinkFirst;
-				}
-				else
-				{
-					assert(pbucket->plinkLast->GetNext() == nullptr);
-					m_plinkLast->SetNext(pbucket->plinkFirst);
-				}
-
-				m_plinkLast = pbucket->plinkLast;
-			}
-		}
-
-
-	}
-
-	// ----------------------------------------------------------------------------------------------------
-	// clear all of the buckets.  normally done in preparation for a sort
-	//
-	void SortedBlockList::InitBuckets(void)
-    {
-        for (int iBucket = 0; iBucket < m_iBuckets; iBucket++)
-        {
-            Bucket *pbucket = &m_pabucket[iBucket];
-
-            pbucket->plinkFirst = 0;
-            pbucket->plinkLast = 0;
-        }
-    }
-
-    // ----------------------------------------------------------------------------------------------------
-    // print out the list of sorted 4x4 blocks
-	// normally used for debugging
-	//
-    void SortedBlockList::Print(void)
-    {
-        for (int iBucket = m_iBuckets-1; iBucket >= 0; iBucket--)
-        {
-            Bucket *pbucket = &m_pabucket[iBucket];
-
-            unsigned int uiBlocks = 0;
-            for (Link *plink = pbucket->plinkFirst; plink != nullptr; plink = plink->GetNext() )
-            {
-                uiBlocks++;
-
-				if (plink == pbucket->plinkLast)
-				{
-					break;
-				}
-            }
-
-            float fBucketError = m_fMaxError * iBucket / m_iBuckets;
-            float fBucketRMS = sqrtf(fBucketError / (4.0f*16.0f) );
-            printf("%3d: e=%.3f rms=%.6f %u\n", iBucket, fBucketError, fBucketRMS, uiBlocks);
-        }
-    }
-
-    // ----------------------------------------------------------------------------------------------------
-    //
-
-}   // namespace Etc
diff --git a/thirdparty/etc2comp/EtcSortedBlockList.h b/thirdparty/etc2comp/EtcSortedBlockList.h
deleted file mode 100644
index 960e8adc34..0000000000
--- a/thirdparty/etc2comp/EtcSortedBlockList.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2015 The Etc2Comp Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace Etc
-{
-	class Block4x4;
-
-    class SortedBlockList
-    {
-    public:
-
-		class Link
-		{
-		public:
-
-			inline void Init(Block4x4 *a_pblock)
-			{
-				m_pblock = a_pblock;
-				m_plinkNext = nullptr;
-			}
-
-			inline Block4x4 * GetBlock(void)
-			{
-				return m_pblock;
-			}
-
-			inline void SetNext(Link *a_plinkNext)
-			{
-				m_plinkNext = a_plinkNext;
-			}
-
-			inline Link * GetNext(void)
-			{
-				return m_plinkNext;
-			}
-
-			inline Link * Advance(unsigned int a_uiSteps = 1)
-			{
-				Link *plink = this;
-
-				for (unsigned int uiStep = 0; uiStep < a_uiSteps; uiStep++)
-				{
-					if (plink == nullptr)
-					{
-						break;
-					}
-
-					plink = plink->m_plinkNext;
-				}
-
-				return plink;
-			}
-
-		private:
-
-			Block4x4 *m_pblock;
-			Link *m_plinkNext;
-		};
-
-		SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets);
-		~SortedBlockList(void);
-
-        void AddBlock(Block4x4 *a_pblock);
-
-        void Sort(void);
-
-		inline Link * GetLinkToFirstBlock(void)
-		{
-			return m_plinkFirst;
-		}
-
-		inline unsigned int GetNumberOfAddedBlocks(void)
-		{
-			return m_uiAddedBlocks;
-		}
-
-		inline unsigned int GetNumberOfSortedBlocks(void)
-		{
-			return m_uiSortedBlocks;
-		}
-
-		void Print(void);
-
-	private:
-
-        void InitBuckets(void);
-
-        class Bucket
-        {
-        public:
-            Link *plinkFirst;
-            Link *plinkLast;
-        };
-
-        unsigned int m_uiImageBlocks;
-        int m_iBuckets;
-
-		unsigned int m_uiAddedBlocks;
-		unsigned int m_uiSortedBlocks;
-		Link *m_palinkPool;
-        Bucket *m_pabucket;
-        float m_fMaxError;
-
-		Link *m_plinkFirst;
-		Link *m_plinkLast;
-
-    };
-
-} // namespace Etc
diff --git a/thirdparty/etc2comp/LICENSE b/thirdparty/etc2comp/LICENSE
deleted file mode 100644
index d645695673..0000000000
--- a/thirdparty/etc2comp/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/thirdparty/etc2comp/README.md b/thirdparty/etc2comp/README.md
deleted file mode 100644
index 2f4363d042..0000000000
--- a/thirdparty/etc2comp/README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# Etc2Comp - Texture to ETC2 compressor
-
-Etc2Comp is a command line tool that converts textures (e.g. bitmaps)
-into the [ETC2](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression)
-format. The tool is built with a focus on encoding performance
-to reduce the amount of time required to compile asset heavy applications as
-well as reduce overall application size.
-
-This repo provides source code that can be compiled into a binary. The
-binary can then be used to convert textures to the ETC2 format.
-
-Important: This is not an official Google product. It is an experimental
-library published as-is. Please see the CONTRIBUTORS.md file for information
-about questions or issues.
-
-## Setup
-This project uses [CMake](https://cmake.org/) to generate platform-specific
-build files:
- - Linux: make files
- - OS X: Xcode workspace files
- - Microsoft Windows: Visual Studio solution files
- - Note: CMake supports other formats, but this doc only provides steps for
- one of each platform for brevity.
-
-Refer to each platform's setup section to setup your environment and build
-an Etc2Comp binary. Then skip to the usage section of this page for examples
-of how to use the library.
-
-### Setup for OS X
- build tested on this config:
-  OS X 10.9.5 i7 16GB RAM
-  Xcode 5.1.1
-  cmake 3.2.3
-  
-Start by downloading and installing the following components if they are not
-already installed on your development machine.
- - *Xcode* version 5.1.1, or greater
- - [CMake](https://cmake.org/download/) version 3.2.3, or greater
-
-To build the Etc2Comp binary:
- 1. Open a *Terminal* window and navigate to the project directory.
- 1. Run `mkdir build_xcode`
- 1. Run `cd build_xcode`
- 1. Run `cmake -G Xcode ../`
- 1. Open *Xcode* and import the `build_xcode/EtcTest.xcodeproj` file.
- 1. Open the Product menu and choose Build For -> Running.
- 1. Once the build succeeds the binary located at `build_xcode/EtcTool/Debug/EtcTool`
-can be executed.
-
-Optional
-Xcode EtcTool ‘Run’ preferences
-note: if the build_xcode/EtcTest.xcodeproj is manually deleted then some Xcode preferences 
-will need to be set by hand after cmake is run (these prefs are retained across 
-cmake updates if the .xcodeproj is not deleted/removed)
-
-1. Set the active scheme to ‘EtcTool’
-1. Edit the scheme
-1. Select option ‘Run EtcTool’, then tab ‘Arguments’. 
-Add this launch argument: ‘-argfile ../../EtcTool/args.txt’
-1. Select tab ‘Options’ and set a custom working directory to: ‘$(SRCROOT)/Build_Xcode/EtcTool’
-
-### SetUp for Windows
-
-1. Open a *Terminal* window and navigate to the project directory.
-1. Run `mkdir build_vs`
-1. Run `cd build_vs`
-1. Run CMAKE, noting what build version you need, and pointing to the parent directory as the source root; 
-  For VS 2013 : `cmake -G "Visual Studio 12 2013 Win64" ../`
-  For VS 2015 : `cmake -G "Visual Studio 14 2015 Win64" ../`
-  NOTE: To see what supported Visual Studio outputs there are, run `cmake -G`
-1. open the 'EtcTest' solution
-1. make the 'EtcTool' project the start up project 
-1. (optional) in the project properties, under 'Debugging ->command arguments' 
-add the argfile textfile thats included in the EtcTool directory. 
-example: -argfile C:\etc2\EtcTool\Args.txt
-
-### Setup For Linux
-The Linux build was tested on this config:
-  Ubuntu desktop 14.04
-  gcc/g++ 4.8
-  cmake 2.8.12.2
-
-1. Verify linux has cmake and C++-11 capable g++ installed
-1. Open shell
-1. Run `mkdir build_linux`
-1. Run `cd build_linux`
-1. Run `cmake ../`
-1. Run `make`
-1. navigate to the newly created EtcTool directory `cd EtcTool`
-1. run the executable: `./EtcTool -argfile ../../EtcTool/args.txt`
-
-Skip to the <a href="#usage">Usage</a> section for more information about using the
-tool.
-
-## Usage
-
-### Command Line Usage
-EtcTool can be run from the command line with the following usage:
-    etctool.exe source_image [options ...] -output encoded_image
-
-The encoder will use an array of RGBA floats read from the source_image to create 
-an ETC1 or ETC2 encoded image in encoded_image.  The RGBA floats should be in the 
-range [0:1].
-
-Options:
-
-    -analyze <analysis_folder>
-    -argfile <arg_file>           additional command line arguments read from a file
-    -blockAtHV <H V>              encodes a single block that contains the
-                                  pixel specified by the H V coordinates
-    -compare <comparison_image>   compares source_image to comparison_image
-    -effort <amount>              number between 0 and 100 to specify the encoding quality 
-                                  (100 is the highest quality)
-    -errormetric <error_metric>   specify the error metric, the options are
-                                  rgba, rgbx, rec709, numeric and normalxyz
-    -format <etc_format>          ETC1, RGB8, SRGB8, RGBA8, SRGB8, RGB8A1,
-                                  SRGB8A1 or R11
-    -help                         prints this message
-    -jobs or -j <thread_count>    specifies the number of threads (default=1)
-    -normalizexyz                 normalize RGB to have a length of 1
-    -verbose or -v                shows status information during the encoding
-                                  process
-	-mipmaps or -m <mip_count>    sets the maximum number of mipaps to generate (default=1)
-	-mipwrap or -w <x|y|xy>       sets the mipmap filter wrap mode (default=clamp)
-
-* -analyze will run an analysis of the encoding and place it in folder 
-"analysis_folder" (e.g. ../analysis/kodim05).  within the analysis_folder, a folder 
-will be created with a name of the current date/time (e.g. 20151204_153306).  this 
-date/time folder is used to compare encodings of the same texture over time.  
-within the date/time folder is a text file with several encoding stats and a 2x png 
-image showing the encoding mode for each 4x4 block.
-
-* -argfile allows additional command line arguments to be placed in a text file
-
-* -blockAtHV selects the 4x4 pixel subset of the source image at position (H,V).  
-This is mainly used for debugging
-
-* -compare compares the source image to the created encoded image. The encoding
-will dictate what error analysis is used in the comparison.
-
-* -effort uses an "amount" between 0 and 100 to determine how much additional effort 
-to apply during the encoding.
-
-* -errormetric selects the fitting algorithm used by the encoder.  "rgba" calculates 
-RMS error using RGB components that are weighted by A.  "rgbx" calculates RMS error 
-using RGBA components, where A is treated as an additional data channel, instead of 
-as alpha.  "rec709" is similar to "rgba", except the RGB components are also weighted 
-according to Rec709.  "numeric" calculates RMS error using unweighted RGBA components.  
-"normalize" calculates error based on dot product and vector length for RGB and RMS 
-error for A.
-
-* -help prints out the usage message
-
-* -jobs enables multi-threading to speed up image encoding
-
-* -normalizexyz normalizes the source RGB to have a length of 1.
-
-* -verbose shows information on the current encoding process. It will then display the 
-PSNR and time time it took to encode the image.
-
-* -mipmaps takes an argument that specifies how many mipmaps to generate from the 
-source image.  The mipmaps are generated with a lanczos3 filter using edge clamping.
-If the mipmaps option is not specified no mipmaps are created.
-
-* -mipwrap takes an argument that specifies the mipmap filter wrap mode.  The options 
-are "x", "y" and "xy" which specify wrapping in x only, y only or x and y respectively.
-The default options are clamping in both x and y.
-
-Note: Path names can use slashes or backslashes.  The tool will convert the 
-slashes to the appropriate polarity for the current platform.
-
-
-## API
-
-The library supports two different APIs - a C-like API that is not heavily 
-class-based and a class-based API.
-
-main() in EtcTool.cpp contains an example of both APIs.
-
-The Encode() method now returns an EncodingStatus that contains bit flags for
-reporting various warnings and flags encountered when encoding.
-
-
-## Copyright
-Copyright 2015 Etc2Comp Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch b/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch
deleted file mode 100644
index ea9b5640b6..0000000000
--- a/thirdparty/etc2comp/patches/fix-rgba8-max-channels.patch
+++ /dev/null
@@ -1,224 +0,0 @@
-diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-index 5656556db9..5c7ebed788 100644
---- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-+++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp
-@@ -508,7 +508,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -519,7 +519,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -530,7 +530,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -545,7 +545,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -556,7 +556,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -567,7 +567,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-@@ -761,7 +761,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -772,7 +772,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -783,7 +783,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -798,7 +798,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -809,7 +809,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -820,7 +820,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-index ba2b42fb05..b94b64e68c 100644
---- a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-+++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp
-@@ -847,7 +847,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -858,7 +858,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -869,7 +869,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -884,7 +884,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -895,7 +895,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -906,7 +906,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
-@@ -1108,7 +1108,7 @@ namespace Etc
- 		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
- 		if (iMaxRed1 > 15)
- 		{
--			iMinRed1 = 15;
-+			iMaxRed1 = 15;
- 		}
- 
- 		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
-@@ -1119,7 +1119,7 @@ namespace Etc
- 		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
- 		if (iMaxGreen1 > 15)
- 		{
--			iMinGreen1 = 15;
-+			iMaxGreen1 = 15;
- 		}
- 
- 		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
-@@ -1130,7 +1130,7 @@ namespace Etc
- 		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
- 		if (iMaxBlue1 > 15)
- 		{
--			iMinBlue1 = 15;
-+			iMaxBlue1 = 15;
- 		}
- 
- 		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
-@@ -1145,7 +1145,7 @@ namespace Etc
- 		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
- 		if (iMaxRed2 > 15)
- 		{
--			iMinRed2 = 15;
-+			iMaxRed2 = 15;
- 		}
- 
- 		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
-@@ -1156,7 +1156,7 @@ namespace Etc
- 		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
- 		if (iMaxGreen2 > 15)
- 		{
--			iMinGreen2 = 15;
-+			iMaxGreen2 = 15;
- 		}
- 
- 		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
-@@ -1167,7 +1167,7 @@ namespace Etc
- 		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
- 		if (iMaxBlue2 > 15)
- 		{
--			iMinBlue2 = 15;
-+			iMaxBlue2 = 15;
- 		}
- 
- 		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
diff --git a/thirdparty/etcpak/AUTHORS.txt b/thirdparty/etcpak/AUTHORS.txt
new file mode 100644
index 0000000000..e7bae62c85
--- /dev/null
+++ b/thirdparty/etcpak/AUTHORS.txt
@@ -0,0 +1,3 @@
+Bartosz Taudul <wolf@nereid.pl>
+Daniel Jungmann <el.3d.source@gmail.com>
+Florian Penzkofer <fp@nullptr.de>
diff --git a/thirdparty/etcpak/Dither.cpp b/thirdparty/etcpak/Dither.cpp
new file mode 100644
index 0000000000..355686f26b
--- /dev/null
+++ b/thirdparty/etcpak/Dither.cpp
@@ -0,0 +1,120 @@
+#include <algorithm>
+#include <string.h>
+
+#include "Dither.hpp"
+#include "Math.hpp"
+#ifdef __SSE4_1__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    include <Windows.h>
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+#ifdef __AVX2__
+void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
+    static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
+    static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
+    static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
+
+    const __m256i BayerAdd0 = _mm256_setr_epi8(
+        a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
+        a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
+    );
+    const __m256i BayerAdd1 = _mm256_setr_epi8(
+        a31[8],  a63[8],  a31[8],  0, a31[9],  a63[9],  a31[9],  0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
+        a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
+    );
+    const __m256i BayerSub0 = _mm256_setr_epi8(
+        s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
+        s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
+    );
+    const __m256i BayerSub1 = _mm256_setr_epi8(
+        s31[8],  s63[8],  s31[8],  0, s31[9],  s63[9],  s31[9],  0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
+        s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
+    );
+
+    __m256i l0 = _mm256_inserti128_si256( _mm256_castsi128_si256( px0 ), px1, 1 );
+    __m256i l1 = _mm256_inserti128_si256( _mm256_castsi128_si256( px2 ), px3, 1 );
+
+    __m256i a0 = _mm256_adds_epu8( l0, BayerAdd0 );
+    __m256i a1 = _mm256_adds_epu8( l1, BayerAdd1 );
+    __m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
+    __m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
+
+    _mm256_storeu_si256( (__m256i*)(data   ), s0 );
+    _mm256_storeu_si256( (__m256i*)(data+32), s1 );
+
+}
+#endif
+
+void Dither( uint8_t* data )
+{
+#ifdef __AVX2__
+    static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
+    static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
+    static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
+    static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
+
+    const __m256i BayerAdd0 = _mm256_setr_epi8(
+        a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
+        a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
+    );
+    const __m256i BayerAdd1 = _mm256_setr_epi8(
+        a31[8],  a63[8],  a31[8],  0, a31[9],  a63[9],  a31[9],  0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
+        a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
+    );
+    const __m256i BayerSub0 = _mm256_setr_epi8(
+        s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
+        s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
+    );
+    const __m256i BayerSub1 = _mm256_setr_epi8(
+        s31[8],  s63[8],  s31[8],  0, s31[9],  s63[9],  s31[9],  0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
+        s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
+    );
+
+    __m256i px0 = _mm256_loadu_si256( (__m256i*)(data   ) );
+    __m256i px1 = _mm256_loadu_si256( (__m256i*)(data+32) );
+
+    __m256i a0 = _mm256_adds_epu8( px0, BayerAdd0 );
+    __m256i a1 = _mm256_adds_epu8( px1, BayerAdd1 );
+    __m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
+    __m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
+
+    _mm256_storeu_si256( (__m256i*)(data   ), s0 );
+    _mm256_storeu_si256( (__m256i*)(data+32), s1 );
+#else
+    static constexpr int8_t Bayer31[16] = {
+        ( 0-8)*2/3, ( 8-8)*2/3, ( 2-8)*2/3, (10-8)*2/3,
+        (12-8)*2/3, ( 4-8)*2/3, (14-8)*2/3, ( 6-8)*2/3,
+        ( 3-8)*2/3, (11-8)*2/3, ( 1-8)*2/3, ( 9-8)*2/3,
+        (15-8)*2/3, ( 7-8)*2/3, (13-8)*2/3, ( 5-8)*2/3
+    };
+    static constexpr int8_t Bayer63[16] = {
+        ( 0-8)*2/6, ( 8-8)*2/6, ( 2-8)*2/6, (10-8)*2/6,
+        (12-8)*2/6, ( 4-8)*2/6, (14-8)*2/6, ( 6-8)*2/6,
+        ( 3-8)*2/6, (11-8)*2/6, ( 1-8)*2/6, ( 9-8)*2/6,
+        (15-8)*2/6, ( 7-8)*2/6, (13-8)*2/6, ( 5-8)*2/6
+    };
+
+    for( int i=0; i<16; i++ )
+    {
+        uint32_t col;
+        memcpy( &col, data, 4 );
+        uint8_t r = col & 0xFF;
+        uint8_t g = ( col >> 8 ) & 0xFF;
+        uint8_t b = ( col >> 16 ) & 0xFF;
+
+        r = clampu8( r + Bayer31[i] );
+        g = clampu8( g + Bayer63[i] );
+        b = clampu8( b + Bayer31[i] );
+
+        col = r | ( g << 8 ) | ( b << 16 );
+        memcpy( data, &col, 4 );
+        data += 4;
+    }
+#endif
+}
diff --git a/thirdparty/etcpak/Dither.hpp b/thirdparty/etcpak/Dither.hpp
new file mode 100644
index 0000000000..e43ce5676d
--- /dev/null
+++ b/thirdparty/etcpak/Dither.hpp
@@ -0,0 +1,21 @@
+#ifndef __DITHER_HPP__
+#define __DITHER_HPP__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __AVX2__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+void Dither( uint8_t* data );
+
+#ifdef __AVX2__
+void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 );
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/ForceInline.hpp b/thirdparty/etcpak/ForceInline.hpp
new file mode 100644
index 0000000000..b6f012841b
--- /dev/null
+++ b/thirdparty/etcpak/ForceInline.hpp
@@ -0,0 +1,20 @@
+#ifndef __FORCEINLINE_HPP__
+#define __FORCEINLINE_HPP__
+
+#if defined(__GNUC__)
+#  define etcpak_force_inline __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#  define etcpak_force_inline __forceinline
+#else
+#  define etcpak_force_inline inline
+#endif
+
+#if defined(__GNUC__)
+#  define etcpak_no_inline __attribute__((noinline))
+#elif defined(_MSC_VER)
+#  define etcpak_no_inline __declspec(noinline)
+#else
+#  define etcpak_no_inline
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/LICENSE.txt b/thirdparty/etcpak/LICENSE.txt
new file mode 100644
index 0000000000..59e85d6ea5
--- /dev/null
+++ b/thirdparty/etcpak/LICENSE.txt
@@ -0,0 +1,26 @@
+etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
+
+Copyright (c) 2013-2021, Bartosz Taudul <wolf@nereid.pl>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/thirdparty/etcpak/Math.hpp b/thirdparty/etcpak/Math.hpp
new file mode 100644
index 0000000000..994e1ac4ea
--- /dev/null
+++ b/thirdparty/etcpak/Math.hpp
@@ -0,0 +1,92 @@
+#ifndef __DARKRL__MATH_HPP__
+#define __DARKRL__MATH_HPP__
+
+#include <algorithm>
+#include <cmath>
+#include <stdint.h>
+
+#include "ForceInline.hpp"
+
+template<typename T>
+static etcpak_force_inline T AlignPOT( T val )
+{
+    if( val == 0 ) return 1;
+    val--;
+    for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
+    {
+        val |= val >> i;
+    }
+    return val + 1;
+}
+
+static etcpak_force_inline int CountSetBits( uint32_t val )
+{
+    val -= ( val >> 1 ) & 0x55555555;
+    val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
+    val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
+    val += val >> 8;
+    val += val >> 16;
+    return val & 0x0000003f;
+}
+
+static etcpak_force_inline int CountLeadingZeros( uint32_t val )
+{
+    val |= val >> 1;
+    val |= val >> 2;
+    val |= val >> 4;
+    val |= val >> 8;
+    val |= val >> 16;
+    return 32 - CountSetBits( val );
+}
+
+static etcpak_force_inline float sRGB2linear( float v )
+{
+    const float a = 0.055f;
+    if( v <= 0.04045f )
+    {
+        return v / 12.92f;
+    }
+    else
+    {
+        return pow( ( v + a ) / ( 1 + a ), 2.4f );
+    }
+}
+
+static etcpak_force_inline float linear2sRGB( float v )
+{
+    const float a = 0.055f;
+    if( v <= 0.0031308f )
+    {
+        return 12.92f * v;
+    }
+    else
+    {
+        return ( 1 + a ) * pow( v, 1/2.4f ) - a;
+    }
+}
+
+template<class T>
+static etcpak_force_inline T SmoothStep( T x )
+{
+    return x*x*(3-2*x);
+}
+
+static etcpak_force_inline uint8_t clampu8( int32_t val )
+{
+    if( ( val & ~0xFF ) == 0 ) return val;
+    return ( ( ~val ) >> 31 ) & 0xFF;
+}
+
+template<class T>
+static etcpak_force_inline T sq( T val )
+{
+    return val * val;
+}
+
+static etcpak_force_inline int mul8bit( int a, int b )
+{
+    int t = a*b + 128;
+    return ( t + ( t >> 8 ) ) >> 8;
+}
+
+#endif
diff --git a/thirdparty/etcpak/ProcessCommon.hpp b/thirdparty/etcpak/ProcessCommon.hpp
new file mode 100644
index 0000000000..657d68888f
--- /dev/null
+++ b/thirdparty/etcpak/ProcessCommon.hpp
@@ -0,0 +1,50 @@
+#ifndef __PROCESSCOMMON_HPP__
+#define __PROCESSCOMMON_HPP__
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+template<class T>
+static size_t GetLeastError( const T* err, size_t num )
+{
+    size_t idx = 0;
+    for( size_t i=1; i<num; i++ )
+    {
+        if( err[i] < err[idx] )
+        {
+            idx = i;
+        }
+    }
+    return idx;
+}
+
+static uint64_t FixByteOrder( uint64_t d )
+{
+    return ( ( d & 0x00000000FFFFFFFF ) ) |
+           ( ( d & 0xFF00000000000000 ) >> 24 ) |
+           ( ( d & 0x000000FF00000000 ) << 24 ) |
+           ( ( d & 0x00FF000000000000 ) >> 8 ) |
+           ( ( d & 0x0000FF0000000000 ) << 8 );
+}
+
+template<class T, class S>
+static uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id )
+{
+    size_t tidx[2];
+    tidx[0] = GetLeastError( terr[0], 8 );
+    tidx[1] = GetLeastError( terr[1], 8 );
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+    for( int i=0; i<16; i++ )
+    {
+        uint64_t t = tsel[i][tidx[id[i]%2]];
+        d |= ( t & 0x1 ) << ( i + 32 );
+        d |= ( t & 0x2 ) << ( i + 47 );
+    }
+
+    return d;
+}
+
+#endif
diff --git a/thirdparty/etcpak/ProcessDxtc.cpp b/thirdparty/etcpak/ProcessDxtc.cpp
new file mode 100644
index 0000000000..508d55fd75
--- /dev/null
+++ b/thirdparty/etcpak/ProcessDxtc.cpp
@@ -0,0 +1,956 @@
+#include "Dither.hpp"
+#include "ForceInline.hpp"
+#include "ProcessDxtc.hpp"
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+#if defined __AVX__ && !defined __SSE4_1__
+#  define __SSE4_1__
+#endif
+
+#if defined __SSE4_1__ || defined __AVX2__
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#  else
+#    include <x86intrin.h>
+#    ifndef _mm256_cvtsi256_si32
+#      define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
+#    endif
+#  endif
+#endif
+
+
+static etcpak_force_inline uint16_t to565( uint8_t r, uint8_t g, uint8_t b )
+{
+    return ( ( r & 0xF8 ) << 8 ) | ( ( g & 0xFC ) << 3 ) | ( b >> 3 );
+}
+
+static etcpak_force_inline uint16_t to565( uint32_t c )
+{
+    return
+        ( ( c & 0xF80000 ) >> 19 ) |
+        ( ( c & 0x00FC00 ) >> 5 ) |
+        ( ( c & 0x0000F8 ) << 8 );
+}
+
+static const uint8_t DxtcIndexTable[256] = {
+    85,     87,     86,     84,     93,     95,     94,     92,     89,     91,     90,     88,     81,     83,     82,     80,
+    117,    119,    118,    116,    125,    127,    126,    124,    121,    123,    122,    120,    113,    115,    114,    112,
+    101,    103,    102,    100,    109,    111,    110,    108,    105,    107,    106,    104,    97,     99,     98,     96,
+    69,     71,     70,     68,     77,     79,     78,     76,     73,     75,     74,     72,     65,     67,     66,     64,
+    213,    215,    214,    212,    221,    223,    222,    220,    217,    219,    218,    216,    209,    211,    210,    208,
+    245,    247,    246,    244,    253,    255,    254,    252,    249,    251,    250,    248,    241,    243,    242,    240,
+    229,    231,    230,    228,    237,    239,    238,    236,    233,    235,    234,    232,    225,    227,    226,    224,
+    197,    199,    198,    196,    205,    207,    206,    204,    201,    203,    202,    200,    193,    195,    194,    192,
+    149,    151,    150,    148,    157,    159,    158,    156,    153,    155,    154,    152,    145,    147,    146,    144,
+    181,    183,    182,    180,    189,    191,    190,    188,    185,    187,    186,    184,    177,    179,    178,    176,
+    165,    167,    166,    164,    173,    175,    174,    172,    169,    171,    170,    168,    161,    163,    162,    160,
+    133,    135,    134,    132,    141,    143,    142,    140,    137,    139,    138,    136,    129,    131,    130,    128,
+    21,     23,     22,     20,     29,     31,     30,     28,     25,     27,     26,     24,     17,     19,     18,     16,
+    53,     55,     54,     52,     61,     63,     62,     60,     57,     59,     58,     56,     49,     51,     50,     48,
+    37,     39,     38,     36,     45,     47,     46,     44,     41,     43,     42,     40,     33,     35,     34,     32,
+    5,      7,      6,      4,      13,     15,     14,     12,     9,      11,     10,     8,      1,      3,      2,      0
+};
+
+static const uint8_t AlphaIndexTable_SSE[64] = {
+    9,      15,     14,     13,     12,     11,     10,     8,      57,     63,     62,     61,     60,     59,     58,     56,
+    49,     55,     54,     53,     52,     51,     50,     48,     41,     47,     46,     45,     44,     43,     42,     40,
+    33,     39,     38,     37,     36,     35,     34,     32,     25,     31,     30,     29,     28,     27,     26,     24,
+    17,     23,     22,     21,     20,     19,     18,     16,     1,      7,      6,      5,      4,      3,      2,      0,
+};
+
+static const uint16_t DivTable[255*3+1] = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xcccc, 0xaaaa, 0x9249, 0x8000, 0x71c7, 0x6666, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444, 0x4000,
+    0x3c3c, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
+    0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
+    0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
+    0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
+    0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
+    0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
+    0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
+    0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
+    0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
+    0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
+    0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
+    0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
+    0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
+    0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
+    0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
+    0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
+    0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
+    0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
+    0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
+    0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
+    0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
+    0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
+    0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
+    0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
+    0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
+    0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
+    0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
+    0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
+    0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
+    0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
+    0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
+    0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
+    0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
+    0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
+    0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
+    0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
+    0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
+    0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
+    0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
+    0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
+    0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
+    0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
+    0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
+    0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
+    0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
+    0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
+    0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
+};
+static const uint16_t DivTableNEON[255*3+1] = {
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
+    0x0f83, 0x0f0f, 0x0ea0, 0x0e38, 0x0dd6, 0x0d79, 0x0d20, 0x0ccc, 0x0c7c, 0x0c30, 0x0be8, 0x0ba2, 0x0b60, 0x0b21, 0x0ae4, 0x0aaa,
+    0x0a72, 0x0a3d, 0x0a0a, 0x09d8, 0x09a9, 0x097b, 0x094f, 0x0924, 0x08fb, 0x08d3, 0x08ad, 0x0888, 0x0864, 0x0842, 0x0820, 0x0800,
+    0x07e0, 0x07c1, 0x07a4, 0x0787, 0x076b, 0x0750, 0x0736, 0x071c, 0x0703, 0x06eb, 0x06d3, 0x06bc, 0x06a6, 0x0690, 0x067b, 0x0666,
+    0x0652, 0x063e, 0x062b, 0x0618, 0x0606, 0x05f4, 0x05e2, 0x05d1, 0x05c0, 0x05b0, 0x05a0, 0x0590, 0x0581, 0x0572, 0x0563, 0x0555,
+    0x0547, 0x0539, 0x052b, 0x051e, 0x0511, 0x0505, 0x04f8, 0x04ec, 0x04e0, 0x04d4, 0x04c8, 0x04bd, 0x04b2, 0x04a7, 0x049c, 0x0492,
+    0x0487, 0x047d, 0x0473, 0x0469, 0x0460, 0x0456, 0x044d, 0x0444, 0x043b, 0x0432, 0x0429, 0x0421, 0x0418, 0x0410, 0x0408, 0x0400,
+    0x03f8, 0x03f0, 0x03e8, 0x03e0, 0x03d9, 0x03d2, 0x03ca, 0x03c3, 0x03bc, 0x03b5, 0x03ae, 0x03a8, 0x03a1, 0x039b, 0x0394, 0x038e,
+    0x0387, 0x0381, 0x037b, 0x0375, 0x036f, 0x0369, 0x0364, 0x035e, 0x0358, 0x0353, 0x034d, 0x0348, 0x0342, 0x033d, 0x0338, 0x0333,
+    0x032e, 0x0329, 0x0324, 0x031f, 0x031a, 0x0315, 0x0310, 0x030c, 0x0307, 0x0303, 0x02fe, 0x02fa, 0x02f5, 0x02f1, 0x02ec, 0x02e8,
+    0x02e4, 0x02e0, 0x02dc, 0x02d8, 0x02d4, 0x02d0, 0x02cc, 0x02c8, 0x02c4, 0x02c0, 0x02bc, 0x02b9, 0x02b5, 0x02b1, 0x02ae, 0x02aa,
+    0x02a7, 0x02a3, 0x02a0, 0x029c, 0x0299, 0x0295, 0x0292, 0x028f, 0x028c, 0x0288, 0x0285, 0x0282, 0x027f, 0x027c, 0x0279, 0x0276,
+    0x0273, 0x0270, 0x026d, 0x026a, 0x0267, 0x0264, 0x0261, 0x025e, 0x025c, 0x0259, 0x0256, 0x0253, 0x0251, 0x024e, 0x024b, 0x0249,
+    0x0246, 0x0243, 0x0241, 0x023e, 0x023c, 0x0239, 0x0237, 0x0234, 0x0232, 0x0230, 0x022d, 0x022b, 0x0229, 0x0226, 0x0224, 0x0222,
+    0x021f, 0x021d, 0x021b, 0x0219, 0x0216, 0x0214, 0x0212, 0x0210, 0x020e, 0x020c, 0x020a, 0x0208, 0x0206, 0x0204, 0x0202, 0x0200,
+    0x01fe, 0x01fc, 0x01fa, 0x01f8, 0x01f6, 0x01f4, 0x01f2, 0x01f0, 0x01ee, 0x01ec, 0x01ea, 0x01e9, 0x01e7, 0x01e5, 0x01e3, 0x01e1,
+    0x01e0, 0x01de, 0x01dc, 0x01da, 0x01d9, 0x01d7, 0x01d5, 0x01d4, 0x01d2, 0x01d0, 0x01cf, 0x01cd, 0x01cb, 0x01ca, 0x01c8, 0x01c7,
+    0x01c5, 0x01c3, 0x01c2, 0x01c0, 0x01bf, 0x01bd, 0x01bc, 0x01ba, 0x01b9, 0x01b7, 0x01b6, 0x01b4, 0x01b3, 0x01b2, 0x01b0, 0x01af,
+    0x01ad, 0x01ac, 0x01aa, 0x01a9, 0x01a8, 0x01a6, 0x01a5, 0x01a4, 0x01a2, 0x01a1, 0x01a0, 0x019e, 0x019d, 0x019c, 0x019a, 0x0199,
+    0x0198, 0x0197, 0x0195, 0x0194, 0x0193, 0x0192, 0x0190, 0x018f, 0x018e, 0x018d, 0x018b, 0x018a, 0x0189, 0x0188, 0x0187, 0x0186,
+    0x0184, 0x0183, 0x0182, 0x0181, 0x0180, 0x017f, 0x017e, 0x017d, 0x017b, 0x017a, 0x0179, 0x0178, 0x0177, 0x0176, 0x0175, 0x0174,
+    0x0173, 0x0172, 0x0171, 0x0170, 0x016f, 0x016e, 0x016d, 0x016c, 0x016b, 0x016a, 0x0169, 0x0168, 0x0167, 0x0166, 0x0165, 0x0164,
+    0x0163, 0x0162, 0x0161, 0x0160, 0x015f, 0x015e, 0x015d, 0x015c, 0x015b, 0x015a, 0x0159, 0x0158, 0x0158, 0x0157, 0x0156, 0x0155,
+    0x0154, 0x0153, 0x0152, 0x0151, 0x0150, 0x0150, 0x014f, 0x014e, 0x014d, 0x014c, 0x014b, 0x014a, 0x014a, 0x0149, 0x0148, 0x0147,
+    0x0146, 0x0146, 0x0145, 0x0144, 0x0143, 0x0142, 0x0142, 0x0141, 0x0140, 0x013f, 0x013e, 0x013e, 0x013d, 0x013c, 0x013b, 0x013b,
+    0x013a, 0x0139, 0x0138, 0x0138, 0x0137, 0x0136, 0x0135, 0x0135, 0x0134, 0x0133, 0x0132, 0x0132, 0x0131, 0x0130, 0x0130, 0x012f,
+    0x012e, 0x012e, 0x012d, 0x012c, 0x012b, 0x012b, 0x012a, 0x0129, 0x0129, 0x0128, 0x0127, 0x0127, 0x0126, 0x0125, 0x0125, 0x0124,
+    0x0123, 0x0123, 0x0122, 0x0121, 0x0121, 0x0120, 0x0120, 0x011f, 0x011e, 0x011e, 0x011d, 0x011c, 0x011c, 0x011b, 0x011b, 0x011a,
+    0x0119, 0x0119, 0x0118, 0x0118, 0x0117, 0x0116, 0x0116, 0x0115, 0x0115, 0x0114, 0x0113, 0x0113, 0x0112, 0x0112, 0x0111, 0x0111,
+    0x0110, 0x010f, 0x010f, 0x010e, 0x010e, 0x010d, 0x010d, 0x010c, 0x010c, 0x010b, 0x010a, 0x010a, 0x0109, 0x0109, 0x0108, 0x0108,
+    0x0107, 0x0107, 0x0106, 0x0106, 0x0105, 0x0105, 0x0104, 0x0104, 0x0103, 0x0103, 0x0102, 0x0102, 0x0101, 0x0101, 0x0100, 0x0100,
+    0x00ff, 0x00ff, 0x00fe, 0x00fe, 0x00fd, 0x00fd, 0x00fc, 0x00fc, 0x00fb, 0x00fb, 0x00fa, 0x00fa, 0x00f9, 0x00f9, 0x00f8, 0x00f8,
+    0x00f7, 0x00f7, 0x00f6, 0x00f6, 0x00f5, 0x00f5, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f3, 0x00f2, 0x00f2, 0x00f1, 0x00f1, 0x00f0,
+    0x00f0, 0x00f0, 0x00ef, 0x00ef, 0x00ee, 0x00ee, 0x00ed, 0x00ed, 0x00ed, 0x00ec, 0x00ec, 0x00eb, 0x00eb, 0x00ea, 0x00ea, 0x00ea,
+    0x00e9, 0x00e9, 0x00e8, 0x00e8, 0x00e7, 0x00e7, 0x00e7, 0x00e6, 0x00e6, 0x00e5, 0x00e5, 0x00e5, 0x00e4, 0x00e4, 0x00e3, 0x00e3,
+    0x00e3, 0x00e2, 0x00e2, 0x00e1, 0x00e1, 0x00e1, 0x00e0, 0x00e0, 0x00e0, 0x00df, 0x00df, 0x00de, 0x00de, 0x00de, 0x00dd, 0x00dd,
+    0x00dd, 0x00dc, 0x00dc, 0x00db, 0x00db, 0x00db, 0x00da, 0x00da, 0x00da, 0x00d9, 0x00d9, 0x00d9, 0x00d8, 0x00d8, 0x00d7, 0x00d7,
+    0x00d7, 0x00d6, 0x00d6, 0x00d6, 0x00d5, 0x00d5, 0x00d5, 0x00d4, 0x00d4, 0x00d4, 0x00d3, 0x00d3, 0x00d3, 0x00d2, 0x00d2, 0x00d2,
+    0x00d1, 0x00d1, 0x00d1, 0x00d0, 0x00d0, 0x00d0, 0x00cf, 0x00cf, 0x00cf, 0x00ce, 0x00ce, 0x00ce, 0x00cd, 0x00cd, 0x00cd, 0x00cc,
+    0x00cc, 0x00cc, 0x00cb, 0x00cb, 0x00cb, 0x00ca, 0x00ca, 0x00ca, 0x00c9, 0x00c9, 0x00c9, 0x00c9, 0x00c8, 0x00c8, 0x00c8, 0x00c7,
+    0x00c7, 0x00c7, 0x00c6, 0x00c6, 0x00c6, 0x00c5, 0x00c5, 0x00c5, 0x00c5, 0x00c4, 0x00c4, 0x00c4, 0x00c3, 0x00c3, 0x00c3, 0x00c3,
+    0x00c2, 0x00c2, 0x00c2, 0x00c1, 0x00c1, 0x00c1, 0x00c1, 0x00c0, 0x00c0, 0x00c0, 0x00bf, 0x00bf, 0x00bf, 0x00bf, 0x00be, 0x00be,
+    0x00be, 0x00bd, 0x00bd, 0x00bd, 0x00bd, 0x00bc, 0x00bc, 0x00bc, 0x00bc, 0x00bb, 0x00bb, 0x00bb, 0x00ba, 0x00ba, 0x00ba, 0x00ba,
+    0x00b9, 0x00b9, 0x00b9, 0x00b9, 0x00b8, 0x00b8, 0x00b8, 0x00b8, 0x00b7, 0x00b7, 0x00b7, 0x00b7, 0x00b6, 0x00b6, 0x00b6, 0x00b6,
+    0x00b5, 0x00b5, 0x00b5, 0x00b5, 0x00b4, 0x00b4, 0x00b4, 0x00b4, 0x00b3, 0x00b3, 0x00b3, 0x00b3, 0x00b2, 0x00b2, 0x00b2, 0x00b2,
+    0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae,
+    0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab,
+};
+
+static const uint16_t DivTableAlpha[256] = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xe38e, 0xcccc, 0xba2e, 0xaaaa, 0x9d89, 0x9249, 0x8888, 0x8000,
+    0x7878, 0x71c7, 0x6bca, 0x6666, 0x6186, 0x5d17, 0x590b, 0x5555, 0x51eb, 0x4ec4, 0x4bda, 0x4924, 0x469e, 0x4444, 0x4210, 0x4000,
+    0x3e0f, 0x3c3c, 0x3a83, 0x38e3, 0x3759, 0x35e5, 0x3483, 0x3333, 0x31f3, 0x30c3, 0x2fa0, 0x2e8b, 0x2d82, 0x2c85, 0x2b93, 0x2aaa,
+    0x29cb, 0x28f5, 0x2828, 0x2762, 0x26a4, 0x25ed, 0x253c, 0x2492, 0x23ee, 0x234f, 0x22b6, 0x2222, 0x2192, 0x2108, 0x2082, 0x2000,
+    0x1f81, 0x1f07, 0x1e91, 0x1e1e, 0x1dae, 0x1d41, 0x1cd8, 0x1c71, 0x1c0e, 0x1bac, 0x1b4e, 0x1af2, 0x1a98, 0x1a41, 0x19ec, 0x1999,
+    0x1948, 0x18f9, 0x18ac, 0x1861, 0x1818, 0x17d0, 0x178a, 0x1745, 0x1702, 0x16c1, 0x1681, 0x1642, 0x1605, 0x15c9, 0x158e, 0x1555,
+    0x151d, 0x14e5, 0x14af, 0x147a, 0x1446, 0x1414, 0x13e2, 0x13b1, 0x1381, 0x1352, 0x1323, 0x12f6, 0x12c9, 0x129e, 0x1273, 0x1249,
+    0x121f, 0x11f7, 0x11cf, 0x11a7, 0x1181, 0x115b, 0x1135, 0x1111, 0x10ec, 0x10c9, 0x10a6, 0x1084, 0x1062, 0x1041, 0x1020, 0x1000,
+    0x0fe0, 0x0fc0, 0x0fa2, 0x0f83, 0x0f66, 0x0f48, 0x0f2b, 0x0f0f, 0x0ef2, 0x0ed7, 0x0ebb, 0x0ea0, 0x0e86, 0x0e6c, 0x0e52, 0x0e38,
+    0x0e1f, 0x0e07, 0x0dee, 0x0dd6, 0x0dbe, 0x0da7, 0x0d90, 0x0d79, 0x0d62, 0x0d4c, 0x0d36, 0x0d20, 0x0d0b, 0x0cf6, 0x0ce1, 0x0ccc,
+    0x0cb8, 0x0ca4, 0x0c90, 0x0c7c, 0x0c69, 0x0c56, 0x0c43, 0x0c30, 0x0c1e, 0x0c0c, 0x0bfa, 0x0be8, 0x0bd6, 0x0bc5, 0x0bb3, 0x0ba2,
+    0x0b92, 0x0b81, 0x0b70, 0x0b60, 0x0b50, 0x0b40, 0x0b30, 0x0b21, 0x0b11, 0x0b02, 0x0af3, 0x0ae4, 0x0ad6, 0x0ac7, 0x0ab8, 0x0aaa,
+    0x0a9c, 0x0a8e, 0x0a80, 0x0a72, 0x0a65, 0x0a57, 0x0a4a, 0x0a3d, 0x0a30, 0x0a23, 0x0a16, 0x0a0a, 0x09fd, 0x09f1, 0x09e4, 0x09d8,
+    0x09cc, 0x09c0, 0x09b4, 0x09a9, 0x099d, 0x0991, 0x0986, 0x097b, 0x0970, 0x0964, 0x095a, 0x094f, 0x0944, 0x0939, 0x092f, 0x0924,
+    0x091a, 0x090f, 0x0905, 0x08fb, 0x08f1, 0x08e7, 0x08dd, 0x08d3, 0x08ca, 0x08c0, 0x08b7, 0x08ad, 0x08a4, 0x089a, 0x0891, 0x0888,
+    0x087f, 0x0876, 0x086d, 0x0864, 0x085b, 0x0853, 0x084a, 0x0842, 0x0839, 0x0831, 0x0828, 0x0820, 0x0818, 0x0810, 0x0808, 0x0800,
+};
+
+static etcpak_force_inline uint64_t ProcessRGB( const uint8_t* src )
+{
+#ifdef __SSE4_1__
+    __m128i px0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i px1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i px2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i px3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i smask = _mm_set1_epi32( 0xF8FCF8 );
+    __m128i sd0 = _mm_and_si128( px0, smask );
+    __m128i sd1 = _mm_and_si128( px1, smask );
+    __m128i sd2 = _mm_and_si128( px2, smask );
+    __m128i sd3 = _mm_and_si128( px3, smask );
+
+    __m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i sc0 = _mm_cmpeq_epi8(sd0, sc);
+    __m128i sc1 = _mm_cmpeq_epi8(sd1, sc);
+    __m128i sc2 = _mm_cmpeq_epi8(sd2, sc);
+    __m128i sc3 = _mm_cmpeq_epi8(sd3, sc);
+
+    __m128i sm0 = _mm_and_si128(sc0, sc1);
+    __m128i sm1 = _mm_and_si128(sc2, sc3);
+    __m128i sm = _mm_and_si128(sm0, sm1);
+
+    if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) )
+    {
+        uint32_t c;
+        memcpy( &c, src, 4 );
+        return uint64_t( to565( c ) ) << 16;
+    }
+
+    __m128i min0 = _mm_min_epu8( px0, px1 );
+    __m128i min1 = _mm_min_epu8( px2, px3 );
+    __m128i min2 = _mm_min_epu8( min0, min1 );
+
+    __m128i max0 = _mm_max_epu8( px0, px1 );
+    __m128i max1 = _mm_max_epu8( px2, px3 );
+    __m128i max2 = _mm_max_epu8( max0, max1 );
+
+    __m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i min4 = _mm_min_epu8( min2, min3 );
+    __m128i max4 = _mm_max_epu8( max2, max3 );
+
+    __m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i rmin = _mm_min_epu8( min4, min5 );
+    __m128i rmax = _mm_max_epu8( max4, max5 );
+
+    __m128i range1 = _mm_subs_epu8( rmax, rmin );
+    __m128i range2 = _mm_sad_epu8( rmax, rmin );
+
+    uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1;
+    __m128i range = _mm_set1_epi16( DivTable[vrange] );
+
+    __m128i inset1 = _mm_srli_epi16( range1, 4 );
+    __m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) );
+    __m128i min = _mm_adds_epu8( rmin, inset );
+    __m128i max = _mm_subs_epu8( rmax, inset );
+
+    __m128i c0 = _mm_subs_epu8( px0, rmin );
+    __m128i c1 = _mm_subs_epu8( px1, rmin );
+    __m128i c2 = _mm_subs_epu8( px2, rmin );
+    __m128i c3 = _mm_subs_epu8( px3, rmin );
+
+    __m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) );
+    __m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) );
+    __m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) );
+    __m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) );
+
+    __m128i s0 = _mm_hadd_epi16( is0, is1 );
+    __m128i s1 = _mm_hadd_epi16( is2, is3 );
+
+    __m128i m0 = _mm_mulhi_epu16( s0, range );
+    __m128i m1 = _mm_mulhi_epu16( s1, range );
+
+    __m128i p0 = _mm_packus_epi16( m0, m1 );
+
+    __m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) );
+    __m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 );
+    __m128i p3 = _mm_or_si128( p1, p2 );
+    __m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) );
+
+    uint32_t vmin = _mm_cvtsi128_si32( min );
+    uint32_t vmax = _mm_cvtsi128_si32( max );
+    uint32_t vp = _mm_cvtsi128_si32( p );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+#elif defined __ARM_NEON
+#  ifdef __aarch64__
+    uint8x16x4_t px = vld4q_u8( src );
+
+    uint8x16_t lr = px.val[0];
+    uint8x16_t lg = px.val[1];
+    uint8x16_t lb = px.val[2];
+
+    uint8_t rmaxr = vmaxvq_u8( lr );
+    uint8_t rmaxg = vmaxvq_u8( lg );
+    uint8_t rmaxb = vmaxvq_u8( lb );
+
+    uint8_t rminr = vminvq_u8( lr );
+    uint8_t rming = vminvq_u8( lg );
+    uint8_t rminb = vminvq_u8( lb );
+
+    int rr = rmaxr - rminr;
+    int rg = rmaxg - rming;
+    int rb = rmaxb - rminb;
+
+    int vrange1 = rr + rg + rb;
+    uint16_t vrange2 = DivTableNEON[vrange1];
+
+    uint8_t insetr = rr >> 4;
+    uint8_t insetg = rg >> 4;
+    uint8_t insetb = rb >> 4;
+
+    uint8_t minr = rminr + insetr;
+    uint8_t ming = rming + insetg;
+    uint8_t minb = rminb + insetb;
+
+    uint8_t maxr = rmaxr - insetr;
+    uint8_t maxg = rmaxg - insetg;
+    uint8_t maxb = rmaxb - insetb;
+
+    uint8x16_t cr = vsubq_u8( lr, vdupq_n_u8( rminr ) );
+    uint8x16_t cg = vsubq_u8( lg, vdupq_n_u8( rming ) );
+    uint8x16_t cb = vsubq_u8( lb, vdupq_n_u8( rminb ) );
+
+    uint16x8_t is0l = vaddl_u8( vget_low_u8( cr ), vget_low_u8( cg ) );
+    uint16x8_t is0h = vaddl_u8( vget_high_u8( cr ), vget_high_u8( cg ) );
+    uint16x8_t is1l = vaddw_u8( is0l, vget_low_u8( cb ) );
+    uint16x8_t is1h = vaddw_u8( is0h, vget_high_u8( cb ) );
+
+    int16x8_t range = vdupq_n_s16( vrange2 );
+    uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1l ), range ) );
+    uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1h ), range ) );
+
+    uint8x8_t p00 = vmovn_u16( m0 );
+    uint8x8_t p01 = vmovn_u16( m1 );
+    uint8x16_t p0 = vcombine_u8( p00, p01 );
+
+    uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
+    uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
+    uint32x4_t p3 = vaddq_u32( p1, p2 );
+
+    uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
+    uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
+
+    uint32_t vp;
+    vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
+
+    return uint64_t( ( uint64_t( to565( minr, ming, minb ) ) << 16 ) | to565( maxr, maxg, maxb ) | ( uint64_t( vp ) << 32 ) );
+#  else
+    uint32x4_t px0 = vld1q_u32( (uint32_t*)src );
+    uint32x4_t px1 = vld1q_u32( (uint32_t*)src + 4 );
+    uint32x4_t px2 = vld1q_u32( (uint32_t*)src + 8 );
+    uint32x4_t px3 = vld1q_u32( (uint32_t*)src + 12 );
+
+    uint32x4_t smask = vdupq_n_u32( 0xF8FCF8 );
+    uint32x4_t sd0 = vandq_u32( smask, px0 );
+    uint32x4_t sd1 = vandq_u32( smask, px1 );
+    uint32x4_t sd2 = vandq_u32( smask, px2 );
+    uint32x4_t sd3 = vandq_u32( smask, px3 );
+
+    uint32x4_t sc = vdupq_n_u32( sd0[0] );
+
+    uint32x4_t sc0 = vceqq_u32( sd0, sc );
+    uint32x4_t sc1 = vceqq_u32( sd1, sc );
+    uint32x4_t sc2 = vceqq_u32( sd2, sc );
+    uint32x4_t sc3 = vceqq_u32( sd3, sc );
+
+    uint32x4_t sm0 = vandq_u32( sc0, sc1 );
+    uint32x4_t sm1 = vandq_u32( sc2, sc3 );
+    int64x2_t sm = vreinterpretq_s64_u32( vandq_u32( sm0, sm1 ) );
+
+    if( sm[0] == -1 && sm[1] == -1 )
+    {
+        return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
+    }
+
+    uint32x4_t mask = vdupq_n_u32( 0xFFFFFF );
+    uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) );
+    uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) );
+    uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) );
+    uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) );
+
+    uint8x16_t min0 = vminq_u8( l0, l1 );
+    uint8x16_t min1 = vminq_u8( l2, l3 );
+    uint8x16_t min2 = vminq_u8( min0, min1 );
+
+    uint8x16_t max0 = vmaxq_u8( l0, l1 );
+    uint8x16_t max1 = vmaxq_u8( l2, l3 );
+    uint8x16_t max2 = vmaxq_u8( max0, max1 );
+
+    uint8x16_t min3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( min2 ) ) );
+    uint8x16_t max3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( max2 ) ) );
+
+    uint8x16_t min4 = vminq_u8( min2, min3 );
+    uint8x16_t max4 = vmaxq_u8( max2, max3 );
+
+    uint8x16_t min5 = vcombine_u8( vget_high_u8( min4 ), vget_low_u8( min4 ) );
+    uint8x16_t max5 = vcombine_u8( vget_high_u8( max4 ), vget_low_u8( max4 ) );
+
+    uint8x16_t rmin = vminq_u8( min4, min5 );
+    uint8x16_t rmax = vmaxq_u8( max4, max5 );
+
+    uint8x16_t range1 = vsubq_u8( rmax, rmin );
+    uint8x8_t range2 = vget_low_u8( range1 );
+    uint8x8x2_t range3 = vzip_u8( range2, vdup_n_u8( 0 ) );
+    uint16x4_t range4 = vreinterpret_u16_u8( range3.val[0] );
+
+    uint16_t vrange1;
+    uint16x4_t range5 = vpadd_u16( range4, range4 );
+    uint16x4_t range6 = vpadd_u16( range5, range5 );
+    vst1_lane_u16( &vrange1, range6, 0 );
+
+    uint32_t vrange2 = ( 2 << 16 ) / uint32_t( vrange1 + 1 );
+    uint16x8_t range = vdupq_n_u16( vrange2 );
+
+    uint8x16_t inset = vshrq_n_u8( range1, 4 );
+    uint8x16_t min = vaddq_u8( rmin, inset );
+    uint8x16_t max = vsubq_u8( rmax, inset );
+
+    uint8x16_t c0 = vsubq_u8( l0, rmin );
+    uint8x16_t c1 = vsubq_u8( l1, rmin );
+    uint8x16_t c2 = vsubq_u8( l2, rmin );
+    uint8x16_t c3 = vsubq_u8( l3, rmin );
+
+    uint16x8_t is0 = vpaddlq_u8( c0 );
+    uint16x8_t is1 = vpaddlq_u8( c1 );
+    uint16x8_t is2 = vpaddlq_u8( c2 );
+    uint16x8_t is3 = vpaddlq_u8( c3 );
+
+    uint16x4_t is4 = vpadd_u16( vget_low_u16( is0 ), vget_high_u16( is0 ) );
+    uint16x4_t is5 = vpadd_u16( vget_low_u16( is1 ), vget_high_u16( is1 ) );
+    uint16x4_t is6 = vpadd_u16( vget_low_u16( is2 ), vget_high_u16( is2 ) );
+    uint16x4_t is7 = vpadd_u16( vget_low_u16( is3 ), vget_high_u16( is3 ) );
+
+    uint16x8_t s0 = vcombine_u16( is4, is5 );
+    uint16x8_t s1 = vcombine_u16( is6, is7 );
+
+    uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s0 ), vreinterpretq_s16_u16( range ) ) );
+    uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s1 ), vreinterpretq_s16_u16( range ) ) );
+
+    uint8x8_t p00 = vmovn_u16( m0 );
+    uint8x8_t p01 = vmovn_u16( m1 );
+    uint8x16_t p0 = vcombine_u8( p00, p01 );
+
+    uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
+    uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
+    uint32x4_t p3 = vaddq_u32( p1, p2 );
+
+    uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
+    uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
+
+    uint32_t vmin, vmax, vp;
+    vst1q_lane_u32( &vmin, vreinterpretq_u32_u8( min ), 0 );
+    vst1q_lane_u32( &vmax, vreinterpretq_u32_u8( max ), 0 );
+    vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+#  endif
+#else
+    uint32_t ref;
+    memcpy( &ref, src, 4 );
+    uint32_t refMask = ref & 0xF8FCF8;
+    auto stmp = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        uint32_t px;
+        memcpy( &px, stmp, 4 );
+        if( ( px & 0xF8FCF8 ) != refMask ) break;
+        stmp += 4;
+    }
+    if( stmp == src + 64 )
+    {
+        return uint64_t( to565( ref ) ) << 16;
+    }
+
+    uint8_t min[3] = { src[0], src[1], src[2] };
+    uint8_t max[3] = { src[0], src[1], src[2] };
+    auto tmp = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        for( int j=0; j<3; j++ )
+        {
+            if( tmp[j] < min[j] ) min[j] = tmp[j];
+            else if( tmp[j] > max[j] ) max[j] = tmp[j];
+        }
+        tmp += 4;
+    }
+
+    const uint32_t range = DivTable[max[0] - min[0] + max[1] - min[1] + max[2] - min[2]];
+    const uint32_t rmin = min[0] + min[1] + min[2];
+    for( int i=0; i<3; i++ )
+    {
+        const uint8_t inset = ( max[i] - min[i] ) >> 4;
+        min[i] += inset;
+        max[i] -= inset;
+    }
+
+    uint32_t data = 0;
+    for( int i=0; i<16; i++ )
+    {
+        const uint32_t c = src[0] + src[1] + src[2] - rmin;
+        const uint8_t idx = ( c * range ) >> 16;
+        data |= idx << (i*2);
+        src += 4;
+    }
+
+    return uint64_t( ( uint64_t( to565( min[0], min[1], min[2] ) ) << 16 ) | to565( max[0], max[1], max[2] ) | ( uint64_t( data ) << 32 ) );
+#endif
+}
+
+#ifdef __AVX2__
+static etcpak_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
+{
+    __m256i px0 = _mm256_loadu_si256(((__m256i*)src) + 0);
+    __m256i px1 = _mm256_loadu_si256(((__m256i*)src) + 1);
+    __m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
+    __m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
+
+    __m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
+    __m256i sd0 = _mm256_and_si256( px0, smask );
+    __m256i sd1 = _mm256_and_si256( px1, smask );
+    __m256i sd2 = _mm256_and_si256( px2, smask );
+    __m256i sd3 = _mm256_and_si256( px3, smask );
+
+    __m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m256i sc0 = _mm256_cmpeq_epi8(sd0, sc);
+    __m256i sc1 = _mm256_cmpeq_epi8(sd1, sc);
+    __m256i sc2 = _mm256_cmpeq_epi8(sd2, sc);
+    __m256i sc3 = _mm256_cmpeq_epi8(sd3, sc);
+
+    __m256i sm0 = _mm256_and_si256(sc0, sc1);
+    __m256i sm1 = _mm256_and_si256(sc2, sc3);
+    __m256i sm = _mm256_and_si256(sm0, sm1);
+
+    const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
+    const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
+
+    if( solid0 + solid1 == 0 )
+    {
+        const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) );
+        const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) );
+        memcpy( dst, &c0, 8 );
+        memcpy( dst+8, &c1, 8 );
+        dst += 16;
+        return;
+    }
+
+    __m256i min0 = _mm256_min_epu8( px0, px1 );
+    __m256i min1 = _mm256_min_epu8( px2, px3 );
+    __m256i min2 = _mm256_min_epu8( min0, min1 );
+
+    __m256i max0 = _mm256_max_epu8( px0, px1 );
+    __m256i max1 = _mm256_max_epu8( px2, px3 );
+    __m256i max2 = _mm256_max_epu8( max0, max1 );
+
+    __m256i min3 = _mm256_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m256i max3 = _mm256_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m256i min4 = _mm256_min_epu8( min2, min3 );
+    __m256i max4 = _mm256_max_epu8( max2, max3 );
+
+    __m256i min5 = _mm256_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m256i max5 = _mm256_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m256i rmin = _mm256_min_epu8( min4, min5 );
+    __m256i rmax = _mm256_max_epu8( max4, max5 );
+
+    __m256i range1 = _mm256_subs_epu8( rmax, rmin );
+    __m256i range2 = _mm256_sad_epu8( rmax, rmin );
+
+    uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
+    uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
+    __m256i range00 = _mm256_set1_epi16( vrange0 );
+    __m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
+
+    __m256i inset1 = _mm256_srli_epi16( range1, 4 );
+    __m256i inset = _mm256_and_si256( inset1, _mm256_set1_epi8( 0xF ) );
+    __m256i min = _mm256_adds_epu8( rmin, inset );
+    __m256i max = _mm256_subs_epu8( rmax, inset );
+
+    __m256i c0 = _mm256_subs_epu8( px0, rmin );
+    __m256i c1 = _mm256_subs_epu8( px1, rmin );
+    __m256i c2 = _mm256_subs_epu8( px2, rmin );
+    __m256i c3 = _mm256_subs_epu8( px3, rmin );
+
+    __m256i is0 = _mm256_maddubs_epi16( c0, _mm256_set1_epi8( 1 ) );
+    __m256i is1 = _mm256_maddubs_epi16( c1, _mm256_set1_epi8( 1 ) );
+    __m256i is2 = _mm256_maddubs_epi16( c2, _mm256_set1_epi8( 1 ) );
+    __m256i is3 = _mm256_maddubs_epi16( c3, _mm256_set1_epi8( 1 ) );
+
+    __m256i s0 = _mm256_hadd_epi16( is0, is1 );
+    __m256i s1 = _mm256_hadd_epi16( is2, is3 );
+
+    __m256i m0 = _mm256_mulhi_epu16( s0, range );
+    __m256i m1 = _mm256_mulhi_epu16( s1, range );
+
+    __m256i p0 = _mm256_packus_epi16( m0, m1 );
+
+    __m256i p1 = _mm256_or_si256( _mm256_srai_epi32( p0, 6 ), _mm256_srai_epi32( p0, 12 ) );
+    __m256i p2 = _mm256_or_si256( _mm256_srai_epi32( p0, 18 ), p0 );
+    __m256i p3 = _mm256_or_si256( p1, p2 );
+    __m256i p =_mm256_shuffle_epi8( p3, _mm256_set1_epi32( 0x0C080400 ) );
+
+    __m256i mm0 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), min );
+    __m256i mm1 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), max );
+    __m256i mm2 = _mm256_unpacklo_epi64( mm1, mm0 );
+    __m256i mmr = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 11 ), 11 );
+    __m256i mmg = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 26 ), 5 );
+    __m256i mmb = _mm256_srli_epi64( _mm256_slli_epi64( mm2, 16 ), 59 );
+    __m256i mm3 = _mm256_or_si256( mmr, mmg );
+    __m256i mm4 = _mm256_or_si256( mm3, mmb );
+    __m256i mm5 = _mm256_shuffle_epi8( mm4, _mm256_set1_epi32( 0x09080100 ) );
+
+    __m256i d0 = _mm256_unpacklo_epi32( mm5, p );
+    __m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
+    __m128i d2 = _mm256_castsi256_si128( d1 );
+
+    __m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
+    __m128i d3 = _mm_and_si128( d2, mask );
+    _mm_storeu_si128( (__m128i*)dst, d3 );
+
+    for( int j=4; j<8; j++ ) dst[j] = (char)DxtcIndexTable[(uint8_t)dst[j]];
+    for( int j=12; j<16; j++ ) dst[j] = (char)DxtcIndexTable[(uint8_t)dst[j]];
+
+    dst += 16;
+}
+#endif
+
+static const uint8_t AlphaIndexTable[8] = { 1, 7, 6, 5, 4, 3, 2, 0 };
+
+static etcpak_force_inline uint64_t ProcessAlpha( const uint8_t* src )
+{
+    uint8_t solid8 = *src;
+    uint16_t solid16 = uint16_t( solid8 ) | ( uint16_t( solid8 ) << 8 );
+    uint32_t solid32 = uint32_t( solid16 ) | ( uint32_t( solid16 ) << 16 );
+    uint64_t solid64 = uint64_t( solid32 ) | ( uint64_t( solid32 ) << 32 );
+    if( memcmp( src, &solid64, 8 ) == 0 && memcmp( src+8, &solid64, 8 ) == 0 )
+    {
+        return solid8;
+    }
+
+    uint8_t min = src[0];
+    uint8_t max = min;
+    for( int i=1; i<16; i++ )
+    {
+        const auto v = src[i];
+        if( v > max ) max = v;
+        else if( v < min ) min = v;
+    }
+
+    uint32_t range = ( 8 << 13 ) / ( 1 + max - min );
+    uint64_t data = 0;
+    for( int i=0; i<16; i++ )
+    {
+        uint8_t a = src[i] - min;
+        uint64_t idx = AlphaIndexTable[( a * range ) >> 13];
+        data |= idx << (i*3);
+    }
+
+    return max | ( min << 8 ) | ( data << 16 );
+}
+
+#ifdef __SSE4_1__
+static etcpak_force_inline uint64_t ProcessRGB_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    __m128i smask = _mm_set1_epi32( 0xF8FCF8 );
+    __m128i sd0 = _mm_and_si128( px0, smask );
+    __m128i sd1 = _mm_and_si128( px1, smask );
+    __m128i sd2 = _mm_and_si128( px2, smask );
+    __m128i sd3 = _mm_and_si128( px3, smask );
+
+    __m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i sc0 = _mm_cmpeq_epi8(sd0, sc);
+    __m128i sc1 = _mm_cmpeq_epi8(sd1, sc);
+    __m128i sc2 = _mm_cmpeq_epi8(sd2, sc);
+    __m128i sc3 = _mm_cmpeq_epi8(sd3, sc);
+
+    __m128i sm0 = _mm_and_si128(sc0, sc1);
+    __m128i sm1 = _mm_and_si128(sc2, sc3);
+    __m128i sm = _mm_and_si128(sm0, sm1);
+
+    if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) )
+    {
+        return uint64_t( to565( _mm_cvtsi128_si32( px0 ) ) ) << 16;
+    }
+
+    px0 = _mm_and_si128( px0, _mm_set1_epi32( 0xFFFFFF ) );
+    px1 = _mm_and_si128( px1, _mm_set1_epi32( 0xFFFFFF ) );
+    px2 = _mm_and_si128( px2, _mm_set1_epi32( 0xFFFFFF ) );
+    px3 = _mm_and_si128( px3, _mm_set1_epi32( 0xFFFFFF ) );
+
+    __m128i min0 = _mm_min_epu8( px0, px1 );
+    __m128i min1 = _mm_min_epu8( px2, px3 );
+    __m128i min2 = _mm_min_epu8( min0, min1 );
+
+    __m128i max0 = _mm_max_epu8( px0, px1 );
+    __m128i max1 = _mm_max_epu8( px2, px3 );
+    __m128i max2 = _mm_max_epu8( max0, max1 );
+
+    __m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i min4 = _mm_min_epu8( min2, min3 );
+    __m128i max4 = _mm_max_epu8( max2, max3 );
+
+    __m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i rmin = _mm_min_epu8( min4, min5 );
+    __m128i rmax = _mm_max_epu8( max4, max5 );
+
+    __m128i range1 = _mm_subs_epu8( rmax, rmin );
+    __m128i range2 = _mm_sad_epu8( rmax, rmin );
+
+    uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1;
+    __m128i range = _mm_set1_epi16( DivTable[vrange] );
+
+    __m128i inset1 = _mm_srli_epi16( range1, 4 );
+    __m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) );
+    __m128i min = _mm_adds_epu8( rmin, inset );
+    __m128i max = _mm_subs_epu8( rmax, inset );
+
+    __m128i c0 = _mm_subs_epu8( px0, rmin );
+    __m128i c1 = _mm_subs_epu8( px1, rmin );
+    __m128i c2 = _mm_subs_epu8( px2, rmin );
+    __m128i c3 = _mm_subs_epu8( px3, rmin );
+
+    __m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) );
+    __m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) );
+    __m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) );
+    __m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) );
+
+    __m128i s0 = _mm_hadd_epi16( is0, is1 );
+    __m128i s1 = _mm_hadd_epi16( is2, is3 );
+
+    __m128i m0 = _mm_mulhi_epu16( s0, range );
+    __m128i m1 = _mm_mulhi_epu16( s1, range );
+
+    __m128i p0 = _mm_packus_epi16( m0, m1 );
+
+    __m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) );
+    __m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 );
+    __m128i p3 = _mm_or_si128( p1, p2 );
+    __m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) );
+
+    uint32_t vmin = _mm_cvtsi128_si32( min );
+    uint32_t vmax = _mm_cvtsi128_si32( max );
+    uint32_t vp = _mm_cvtsi128_si32( p );
+
+    return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
+}
+
+static etcpak_force_inline uint64_t ProcessAlpha_SSE( __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
+{
+    __m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );
+
+    __m128i m0 = _mm_shuffle_epi8( px0, mask );
+    __m128i m1 = _mm_shuffle_epi8( px1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
+    __m128i m2 = _mm_shuffle_epi8( px2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
+    __m128i m3 = _mm_shuffle_epi8( px3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
+    __m128i m4 = _mm_or_si128( m0, m1 );
+    __m128i m5 = _mm_or_si128( m2, m3 );
+    __m128i a = _mm_or_si128( m4, m5 );
+
+    __m128i solidCmp = _mm_shuffle_epi8( a, _mm_setzero_si128() );
+    __m128i cmpRes = _mm_cmpeq_epi8( a, solidCmp );
+    if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
+    {
+        return _mm_cvtsi128_si32( a ) & 0xFF;
+    }
+
+    __m128i a1 = _mm_shuffle_epi32( a, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max1 = _mm_max_epu8( a, a1 );
+    __m128i min1 = _mm_min_epu8( a, a1 );
+    __m128i amax2 = _mm_shuffle_epi32( max1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i amin2 = _mm_shuffle_epi32( min1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max2 = _mm_max_epu8( max1, amax2 );
+    __m128i min2 = _mm_min_epu8( min1, amin2 );
+    __m128i amax3 = _mm_alignr_epi8( max2, max2, 2 );
+    __m128i amin3 = _mm_alignr_epi8( min2, min2, 2 );
+    __m128i max3 = _mm_max_epu8( max2, amax3 );
+    __m128i min3 = _mm_min_epu8( min2, amin3 );
+    __m128i amax4 = _mm_alignr_epi8( max3, max3, 1 );
+    __m128i amin4 = _mm_alignr_epi8( min3, min3, 1 );
+    __m128i max = _mm_max_epu8( max3, amax4 );
+    __m128i min = _mm_min_epu8( min3, amin4 );
+    __m128i minmax = _mm_unpacklo_epi8( max, min );
+
+    __m128i r = _mm_sub_epi8( max, min );
+    int range = _mm_cvtsi128_si32( r ) & 0xFF;
+    __m128i rv = _mm_set1_epi16( DivTableAlpha[range] );
+
+    __m128i v = _mm_sub_epi8( a, min );
+
+    __m128i lo16 = _mm_unpacklo_epi8( v, _mm_setzero_si128() );
+    __m128i hi16 = _mm_unpackhi_epi8( v, _mm_setzero_si128() );
+
+    __m128i lomul = _mm_mulhi_epu16( lo16, rv );
+    __m128i himul = _mm_mulhi_epu16( hi16, rv );
+
+    __m128i p0 = _mm_packus_epi16( lomul, himul );
+    __m128i p1 = _mm_or_si128( _mm_and_si128( p0, _mm_set1_epi16( 0x3F ) ), _mm_srai_epi16( _mm_and_si128( p0, _mm_set1_epi16( 0x3F00 ) ), 5 ) );
+    __m128i p2 = _mm_packus_epi16( p1, p1 );
+
+    uint64_t pi = _mm_cvtsi128_si64( p2 );
+    uint64_t data = 0;
+    for( int i=0; i<8; i++ )
+    {
+        uint64_t idx = AlphaIndexTable_SSE[(pi>>(i*8)) & 0x3F];
+        data |= idx << (i*6);
+    }
+    return (uint64_t)(uint16_t)_mm_cvtsi128_si32( minmax ) | ( data << 16 );
+}
+#endif
+
+void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+#ifdef __AVX2__
+    if( width%8 == 0 )
+    {
+        blocks /= 2;
+        uint32_t buf[8*4];
+        int i = 0;
+        char* dst8 = (char*)dst;
+
+        do
+        {
+            auto tmp = (char*)buf;
+            memcpy( tmp,        src + width * 0, 8*4 );
+            memcpy( tmp + 8*4,  src + width * 1, 8*4 );
+            memcpy( tmp + 16*4, src + width * 2, 8*4 );
+            memcpy( tmp + 24*4, src + width * 3, 8*4 );
+            src += 8;
+            if( ++i == width/8 )
+            {
+                src += width * 3;
+                i = 0;
+            }
+
+            ProcessRGB_AVX( (uint8_t*)buf, dst8 );
+        }
+        while( --blocks );
+    }
+    else
+#endif
+    {
+        uint32_t buf[4*4];
+        int i = 0;
+
+        auto ptr = dst;
+        do
+        {
+            auto tmp = (char*)buf;
+            memcpy( tmp,        src + width * 0, 4*4 );
+            memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+            memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+            memcpy( tmp + 12*4, src + width * 3, 4*4 );
+            src += 4;
+            if( ++i == width/4 )
+            {
+                src += width * 3;
+                i = 0;
+            }
+
+            const auto c = ProcessRGB( (uint8_t*)buf );
+            uint8_t fix[8];
+            memcpy( fix, &c, 8 );
+            for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+            memcpy( ptr, fix, sizeof( uint64_t ) );
+            ptr++;
+        }
+        while( --blocks );
+    }
+}
+
+void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    uint32_t buf[4*4];
+    int i = 0;
+
+    auto ptr = dst;
+    do
+    {
+        auto tmp = (char*)buf;
+        memcpy( tmp,        src + width * 0, 4*4 );
+        memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+        memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+        memcpy( tmp + 12*4, src + width * 3, 4*4 );
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        Dither( (uint8_t*)buf );
+
+        const auto c = ProcessRGB( (uint8_t*)buf );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+    }
+    while( --blocks );
+}
+
+void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int i = 0;
+    auto ptr = dst;
+    do
+    {
+#ifdef __SSE4_1__
+        __m128i px0 = _mm_loadu_si128( (__m128i*)( src + width * 0 ) );
+        __m128i px1 = _mm_loadu_si128( (__m128i*)( src + width * 1 ) );
+        __m128i px2 = _mm_loadu_si128( (__m128i*)( src + width * 2 ) );
+        __m128i px3 = _mm_loadu_si128( (__m128i*)( src + width * 3 ) );
+
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        *ptr++ = ProcessAlpha_SSE( px0, px1, px2, px3 );
+
+        const auto c = ProcessRGB_SSE( px0, px1, px2, px3 );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+#else
+        uint32_t rgba[4*4];
+        uint8_t alpha[4*4];
+
+        auto tmp = (char*)rgba;
+        memcpy( tmp,        src + width * 0, 4*4 );
+        memcpy( tmp + 4*4,  src + width * 1, 4*4 );
+        memcpy( tmp + 8*4,  src + width * 2, 4*4 );
+        memcpy( tmp + 12*4, src + width * 3, 4*4 );
+        src += 4;
+        if( ++i == width/4 )
+        {
+            src += width * 3;
+            i = 0;
+        }
+
+        for( int i=0; i<16; i++ )
+        {
+            alpha[i] = rgba[i] >> 24;
+            rgba[i] &= 0xFFFFFF;
+        }
+        *ptr++ = ProcessAlpha( alpha );
+
+        const auto c = ProcessRGB( (uint8_t*)rgba );
+        uint8_t fix[8];
+        memcpy( fix, &c, 8 );
+        for( int j=4; j<8; j++ ) fix[j] = DxtcIndexTable[fix[j]];
+        memcpy( ptr, fix, sizeof( uint64_t ) );
+        ptr++;
+#endif
+    }
+    while( --blocks );
+}
diff --git a/thirdparty/etcpak/ProcessDxtc.hpp b/thirdparty/etcpak/ProcessDxtc.hpp
new file mode 100644
index 0000000000..8e0b12e4bd
--- /dev/null
+++ b/thirdparty/etcpak/ProcessDxtc.hpp
@@ -0,0 +1,11 @@
+#ifndef __PROCESSDXT1_HPP__
+#define __PROCESSDXT1_HPP__
+
+#include <stddef.h>
+#include <stdint.h>
+
+void CompressDxt1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressDxt1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressDxt5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+
+#endif
diff --git a/thirdparty/etcpak/ProcessRGB.cpp b/thirdparty/etcpak/ProcessRGB.cpp
new file mode 100644
index 0000000000..7f4524d105
--- /dev/null
+++ b/thirdparty/etcpak/ProcessRGB.cpp
@@ -0,0 +1,3100 @@
+#include <array>
+#include <string.h>
+#include <limits>
+
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+#include "Dither.hpp"
+#include "ForceInline.hpp"
+#include "Math.hpp"
+#include "ProcessCommon.hpp"
+#include "ProcessRGB.hpp"
+#include "Tables.hpp"
+#include "Vector.hpp"
+#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    include <Windows.h>
+#    define _bswap(x) _byteswap_ulong(x)
+#    define _bswap64(x) _byteswap_uint64(x)
+#  else
+#    include <x86intrin.h>
+#  endif
+#endif
+
+#ifndef _bswap
+#  define _bswap(x) __builtin_bswap32(x)
+#  define _bswap64(x) __builtin_bswap64(x)
+#endif
+
+namespace
+{
+
+#if defined _MSC_VER && !defined __clang__
+static etcpak_force_inline unsigned long _bit_scan_forward( unsigned long mask )
+{
+    unsigned long ret;
+    _BitScanForward( &ret, mask );
+    return ret;
+}
+#endif
+
+typedef std::array<uint16_t, 4> v4i;
+
+#ifdef __AVX2__
+static etcpak_force_inline __m256i Sum4_AVX2( const uint8_t* data) noexcept
+{
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
+
+    __m256i t0 = _mm256_cvtepu8_epi16(dm0);
+    __m256i t1 = _mm256_cvtepu8_epi16(dm1);
+    __m256i t2 = _mm256_cvtepu8_epi16(dm2);
+    __m256i t3 = _mm256_cvtepu8_epi16(dm3);
+
+    __m256i sum0 = _mm256_add_epi16(t0, t1);
+    __m256i sum1 = _mm256_add_epi16(t2, t3);
+
+    __m256i s0 = _mm256_permute2x128_si256(sum0, sum1, (0) | (3 << 4)); // 0, 0, 3, 3
+    __m256i s1 = _mm256_permute2x128_si256(sum0, sum1, (1) | (2 << 4)); // 1, 1, 2, 2
+
+    __m256i s2 = _mm256_permute4x64_epi64(s0, _MM_SHUFFLE(1, 3, 0, 2));
+    __m256i s3 = _mm256_permute4x64_epi64(s0, _MM_SHUFFLE(0, 2, 1, 3));
+    __m256i s4 = _mm256_permute4x64_epi64(s1, _MM_SHUFFLE(3, 1, 0, 2));
+    __m256i s5 = _mm256_permute4x64_epi64(s1, _MM_SHUFFLE(2, 0, 1, 3));
+
+    __m256i sum5 = _mm256_add_epi16(s2, s3); //   3,   0,   3,   0
+    __m256i sum6 = _mm256_add_epi16(s4, s5); //   2,   1,   1,   2
+    return _mm256_add_epi16(sum5, sum6);     // 3+2, 0+1, 3+1, 3+2
+}
+
+static etcpak_force_inline __m256i Average_AVX2( const __m256i data) noexcept
+{
+    __m256i a = _mm256_add_epi16(data, _mm256_set1_epi16(4));
+
+    return _mm256_srli_epi16(a, 3);
+}
+
+static etcpak_force_inline __m128i CalcErrorBlock_AVX2( const __m256i data, const v4i a[8]) noexcept
+{
+    //
+    __m256i a0 = _mm256_load_si256((__m256i*)a[0].data());
+    __m256i a1 = _mm256_load_si256((__m256i*)a[4].data());
+
+    // err = 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
+    __m256i a4 = _mm256_madd_epi16(a0, a0);
+    __m256i a5 = _mm256_madd_epi16(a1, a1);
+
+    __m256i a6 = _mm256_hadd_epi32(a4, a5);
+    __m256i a7 = _mm256_slli_epi32(a6, 3);
+
+    __m256i a8 = _mm256_add_epi32(a7, _mm256_set1_epi32(0x3FFFFFFF)); // Big value to prevent negative values, but small enough to prevent overflow
+
+    // average is not swapped
+    // err -= block[0] * 2 * average[0];
+    // err -= block[1] * 2 * average[1];
+    // err -= block[2] * 2 * average[2];
+    __m256i a2 = _mm256_slli_epi16(a0, 1);
+    __m256i a3 = _mm256_slli_epi16(a1, 1);
+    __m256i b0 = _mm256_madd_epi16(a2, data);
+    __m256i b1 = _mm256_madd_epi16(a3, data);
+
+    __m256i b2 = _mm256_hadd_epi32(b0, b1);
+    __m256i b3 = _mm256_sub_epi32(a8, b2);
+    __m256i b4 = _mm256_hadd_epi32(b3, b3);
+
+    __m256i b5 = _mm256_permutevar8x32_epi32(b4, _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0));
+
+    return _mm256_castsi256_si128(b5);
+}
+
+static etcpak_force_inline void ProcessAverages_AVX2(const __m256i d, v4i a[8] ) noexcept
+{
+    __m256i t = _mm256_add_epi16(_mm256_mullo_epi16(d, _mm256_set1_epi16(31)), _mm256_set1_epi16(128));
+
+    __m256i c = _mm256_srli_epi16(_mm256_add_epi16(t, _mm256_srli_epi16(t, 8)), 8);
+
+    __m256i c1 = _mm256_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
+    __m256i diff = _mm256_sub_epi16(c, c1);
+    diff = _mm256_max_epi16(diff, _mm256_set1_epi16(-4));
+    diff = _mm256_min_epi16(diff, _mm256_set1_epi16(3));
+
+    __m256i co = _mm256_add_epi16(c1, diff);
+
+    c = _mm256_blend_epi16(co, c, 0xF0);
+
+    __m256i a0 = _mm256_or_si256(_mm256_slli_epi16(c, 3), _mm256_srli_epi16(c, 2));
+
+    _mm256_store_si256((__m256i*)a[4].data(), a0);
+
+    __m256i t0 = _mm256_add_epi16(_mm256_mullo_epi16(d, _mm256_set1_epi16(15)), _mm256_set1_epi16(128));
+    __m256i t1 = _mm256_srli_epi16(_mm256_add_epi16(t0, _mm256_srli_epi16(t0, 8)), 8);
+
+    __m256i t2 = _mm256_or_si256(t1, _mm256_slli_epi16(t1, 4));
+
+    _mm256_store_si256((__m256i*)a[0].data(), t2);
+}
+
+static etcpak_force_inline uint64_t EncodeAverages_AVX2( const v4i a[8], size_t idx ) noexcept
+{
+    uint64_t d = ( idx << 24 );
+    size_t base = idx << 1;
+
+    __m128i a0 = _mm_load_si128((const __m128i*)a[base].data());
+
+    __m128i r0, r1;
+
+    if( ( idx & 0x2 ) == 0 )
+    {
+        r0 = _mm_srli_epi16(a0, 4);
+
+        __m128i a1 = _mm_unpackhi_epi64(r0, r0);
+        r1 = _mm_slli_epi16(a1, 4);
+    }
+    else
+    {
+        __m128i a1 = _mm_and_si128(a0, _mm_set1_epi16(-8));
+
+        r0 = _mm_unpackhi_epi64(a1, a1);
+        __m128i a2 = _mm_sub_epi16(a1, r0);
+        __m128i a3 = _mm_srai_epi16(a2, 3);
+        r1 = _mm_and_si128(a3, _mm_set1_epi16(0x07));
+    }
+
+    __m128i r2 = _mm_or_si128(r0, r1);
+    // do missing swap for average values
+    __m128i r3 = _mm_shufflelo_epi16(r2, _MM_SHUFFLE(3, 0, 1, 2));
+    __m128i r4 = _mm_packus_epi16(r3, _mm_setzero_si128());
+    d |= _mm_cvtsi128_si32(r4);
+
+    return d;
+}
+
+static etcpak_force_inline uint64_t CheckSolid_AVX2( const uint8_t* src ) noexcept
+{
+    __m256i d0 = _mm256_loadu_si256(((__m256i*)src) + 0);
+    __m256i d1 = _mm256_loadu_si256(((__m256i*)src) + 1);
+
+    __m256i c = _mm256_broadcastd_epi32(_mm256_castsi256_si128(d0));
+
+    __m256i c0 = _mm256_cmpeq_epi8(d0, c);
+    __m256i c1 = _mm256_cmpeq_epi8(d1, c);
+
+    __m256i m = _mm256_and_si256(c0, c1);
+
+    if (!_mm256_testc_si256(m, _mm256_set1_epi32(-1)))
+    {
+        return 0;
+    }
+
+    return 0x02000000 |
+        ( (unsigned int)( src[0] & 0xF8 ) << 16 ) |
+        ( (unsigned int)( src[1] & 0xF8 ) << 8 ) |
+        ( (unsigned int)( src[2] & 0xF8 ) );
+}
+
+static etcpak_force_inline __m128i PrepareAverages_AVX2( v4i a[8], const uint8_t* src) noexcept
+{
+    __m256i sum4 = Sum4_AVX2( src );
+
+    ProcessAverages_AVX2(Average_AVX2( sum4 ), a );
+
+    return CalcErrorBlock_AVX2( sum4, a);
+}
+
+static etcpak_force_inline __m128i PrepareAverages_AVX2( v4i a[8], const __m256i sum4) noexcept
+{
+    ProcessAverages_AVX2(Average_AVX2( sum4 ), a );
+
+    return CalcErrorBlock_AVX2( sum4, a);
+}
+
+static etcpak_force_inline void FindBestFit_4x2_AVX2( uint32_t terr[2][8], uint32_t tsel[8], v4i a[8], const uint32_t offset, const uint8_t* data) noexcept
+{
+    __m256i sel0 = _mm256_setzero_si256();
+    __m256i sel1 = _mm256_setzero_si256();
+
+    for (unsigned int j = 0; j < 2; ++j)
+    {
+        unsigned int bid = offset + 1 - j;
+
+        __m256i squareErrorSum = _mm256_setzero_si256();
+
+        __m128i a0 = _mm_loadl_epi64((const __m128i*)a[bid].data());
+        __m256i a1 = _mm256_broadcastq_epi64(a0);
+
+        // Processing one full row each iteration
+        for (size_t i = 0; i < 8; i += 4)
+        {
+            __m128i rgb = _mm_loadu_si128((const __m128i*)(data + i * 4));
+
+            __m256i rgb16 = _mm256_cvtepu8_epi16(rgb);
+            __m256i d = _mm256_sub_epi16(a1, rgb16);
+
+            // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+            // This produces slightly different results, but is significant faster
+            __m256i pixel0 = _mm256_madd_epi16(d, _mm256_set_epi16(0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14));
+            __m256i pixel1 = _mm256_packs_epi32(pixel0, pixel0);
+            __m256i pixel2 = _mm256_hadd_epi16(pixel1, pixel1);
+            __m128i pixel3 = _mm256_castsi256_si128(pixel2);
+
+            __m128i pix0 = _mm_broadcastw_epi16(pixel3);
+            __m128i pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+            __m256i pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+            // Processing first two pixels of the row
+            {
+                __m256i pix = _mm256_abs_epi16(pixel);
+
+                // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+                // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+                __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+                __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+                __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+                __m256i minError = _mm256_min_epi16(error0, error1);
+
+                // Exploiting symmetry of the selector table and use the sign bit
+                // This produces slightly different results, but is significant faster
+                __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+                // Interleaving values so madd instruction can be used
+                __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+                __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+                __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+                // Squaring the minimum error to produce correct values when adding
+                __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+                squareErrorSum = _mm256_add_epi32(squareErrorSum, squareError);
+
+                // Packing selector bits
+                __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i + j * 8));
+
+                sel0 = _mm256_or_si256(sel0, minIndexLo2);
+                sel1 = _mm256_or_si256(sel1, minIndexHi2);
+            }
+
+            pixel3 = _mm256_extracti128_si256(pixel2, 1);
+            pix0 = _mm_broadcastw_epi16(pixel3);
+            pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+            pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+            // Processing second two pixels of the row
+            {
+                __m256i pix = _mm256_abs_epi16(pixel);
+
+                // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+                // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+                __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+                __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+                __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+                __m256i minError = _mm256_min_epi16(error0, error1);
+
+                // Exploiting symmetry of the selector table and use the sign bit
+                __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+                // Interleaving values so madd instruction can be used
+                __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+                __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+                __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+                // Squaring the minimum error to produce correct values when adding
+                __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+                squareErrorSum = _mm256_add_epi32(squareErrorSum, squareError);
+
+                // Packing selector bits
+                __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i + j * 8));
+                __m256i minIndexLo3 = _mm256_slli_epi16(minIndexLo2, 2);
+                __m256i minIndexHi3 = _mm256_slli_epi16(minIndexHi2, 2);
+
+                sel0 = _mm256_or_si256(sel0, minIndexLo3);
+                sel1 = _mm256_or_si256(sel1, minIndexHi3);
+            }
+        }
+
+        data += 8 * 4;
+
+        _mm256_store_si256((__m256i*)terr[1 - j], squareErrorSum);
+    }
+
+    // Interleave selector bits
+    __m256i minIndexLo0 = _mm256_unpacklo_epi16(sel0, sel1);
+    __m256i minIndexHi0 = _mm256_unpackhi_epi16(sel0, sel1);
+
+    __m256i minIndexLo1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (0) | (2 << 4));
+    __m256i minIndexHi1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (1) | (3 << 4));
+
+    __m256i minIndexHi2 = _mm256_slli_epi32(minIndexHi1, 1);
+
+    __m256i sel = _mm256_or_si256(minIndexLo1, minIndexHi2);
+
+    _mm256_store_si256((__m256i*)tsel, sel);
+}
+
+static etcpak_force_inline void FindBestFit_2x4_AVX2( uint32_t terr[2][8], uint32_t tsel[8], v4i a[8], const uint32_t offset, const uint8_t* data) noexcept
+{
+    __m256i sel0 = _mm256_setzero_si256();
+    __m256i sel1 = _mm256_setzero_si256();
+
+    __m256i squareErrorSum0 = _mm256_setzero_si256();
+    __m256i squareErrorSum1 = _mm256_setzero_si256();
+
+    __m128i a0 = _mm_loadl_epi64((const __m128i*)a[offset + 1].data());
+    __m128i a1 = _mm_loadl_epi64((const __m128i*)a[offset + 0].data());
+
+    __m128i a2 = _mm_broadcastq_epi64(a0);
+    __m128i a3 = _mm_broadcastq_epi64(a1);
+    __m256i a4 = _mm256_insertf128_si256(_mm256_castsi128_si256(a2), a3, 1);
+
+    // Processing one full row each iteration
+    for (size_t i = 0; i < 16; i += 4)
+    {
+        __m128i rgb = _mm_loadu_si128((const __m128i*)(data + i * 4));
+
+        __m256i rgb16 = _mm256_cvtepu8_epi16(rgb);
+        __m256i d = _mm256_sub_epi16(a4, rgb16);
+
+        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+        // This produces slightly different results, but is significant faster
+        __m256i pixel0 = _mm256_madd_epi16(d, _mm256_set_epi16(0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14, 0, 38, 76, 14));
+        __m256i pixel1 = _mm256_packs_epi32(pixel0, pixel0);
+        __m256i pixel2 = _mm256_hadd_epi16(pixel1, pixel1);
+        __m128i pixel3 = _mm256_castsi256_si128(pixel2);
+
+        __m128i pix0 = _mm_broadcastw_epi16(pixel3);
+        __m128i pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+        __m256i pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+        // Processing first two pixels of the row
+        {
+            __m256i pix = _mm256_abs_epi16(pixel);
+
+            // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+            // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+            __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+            __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+            __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+            __m256i minError = _mm256_min_epi16(error0, error1);
+
+            // Exploiting symmetry of the selector table and use the sign bit
+            __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+            // Interleaving values so madd instruction can be used
+            __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+            __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+            __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+            // Squaring the minimum error to produce correct values when adding
+            __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+            squareErrorSum0 = _mm256_add_epi32(squareErrorSum0, squareError);
+
+            // Packing selector bits
+            __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i));
+            __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i));
+
+            sel0 = _mm256_or_si256(sel0, minIndexLo2);
+            sel1 = _mm256_or_si256(sel1, minIndexHi2);
+        }
+
+        pixel3 = _mm256_extracti128_si256(pixel2, 1);
+        pix0 = _mm_broadcastw_epi16(pixel3);
+        pix1 = _mm_broadcastw_epi16(_mm_srli_epi32(pixel3, 16));
+        pixel = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1);
+
+        // Processing second two pixels of the row
+        {
+            __m256i pix = _mm256_abs_epi16(pixel);
+
+            // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+            // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+            __m256i error0 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[0])));
+            __m256i error1 = _mm256_abs_epi16(_mm256_sub_epi16(pix, _mm256_broadcastsi128_si256(g_table128_SIMD[1])));
+
+            __m256i minIndex0 = _mm256_and_si256(_mm256_cmpgt_epi16(error0, error1), _mm256_set1_epi16(1));
+            __m256i minError = _mm256_min_epi16(error0, error1);
+
+            // Exploiting symmetry of the selector table and use the sign bit
+            __m256i minIndex1 = _mm256_srli_epi16(pixel, 15);
+
+            // Interleaving values so madd instruction can be used
+            __m256i minErrorLo = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(1, 1, 0, 0));
+            __m256i minErrorHi = _mm256_permute4x64_epi64(minError, _MM_SHUFFLE(3, 3, 2, 2));
+
+            __m256i minError2 = _mm256_unpacklo_epi16(minErrorLo, minErrorHi);
+            // Squaring the minimum error to produce correct values when adding
+            __m256i squareError = _mm256_madd_epi16(minError2, minError2);
+
+            squareErrorSum1 = _mm256_add_epi32(squareErrorSum1, squareError);
+
+            // Packing selector bits
+            __m256i minIndexLo2 = _mm256_sll_epi16(minIndex0, _mm_cvtsi64_si128(i));
+            __m256i minIndexHi2 = _mm256_sll_epi16(minIndex1, _mm_cvtsi64_si128(i));
+            __m256i minIndexLo3 = _mm256_slli_epi16(minIndexLo2, 2);
+            __m256i minIndexHi3 = _mm256_slli_epi16(minIndexHi2, 2);
+
+            sel0 = _mm256_or_si256(sel0, minIndexLo3);
+            sel1 = _mm256_or_si256(sel1, minIndexHi3);
+        }
+    }
+
+    _mm256_store_si256((__m256i*)terr[1], squareErrorSum0);
+    _mm256_store_si256((__m256i*)terr[0], squareErrorSum1);
+
+    // Interleave selector bits
+    __m256i minIndexLo0 = _mm256_unpacklo_epi16(sel0, sel1);
+    __m256i minIndexHi0 = _mm256_unpackhi_epi16(sel0, sel1);
+
+    __m256i minIndexLo1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (0) | (2 << 4));
+    __m256i minIndexHi1 = _mm256_permute2x128_si256(minIndexLo0, minIndexHi0, (1) | (3 << 4));
+
+    __m256i minIndexHi2 = _mm256_slli_epi32(minIndexHi1, 1);
+
+    __m256i sel = _mm256_or_si256(minIndexLo1, minIndexHi2);
+
+    _mm256_store_si256((__m256i*)tsel, sel);
+}
+
+static etcpak_force_inline uint64_t EncodeSelectors_AVX2( uint64_t d, const uint32_t terr[2][8], const uint32_t tsel[8], const bool rotate) noexcept
+{
+    size_t tidx[2];
+
+    // Get index of minimum error (terr[0] and terr[1])
+    __m256i err0 = _mm256_load_si256((const __m256i*)terr[0]);
+    __m256i err1 = _mm256_load_si256((const __m256i*)terr[1]);
+
+    __m256i errLo = _mm256_permute2x128_si256(err0, err1, (0) | (2 << 4));
+    __m256i errHi = _mm256_permute2x128_si256(err0, err1, (1) | (3 << 4));
+
+    __m256i errMin0 = _mm256_min_epu32(errLo, errHi);
+
+    __m256i errMin1 = _mm256_shuffle_epi32(errMin0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m256i errMin2 = _mm256_min_epu32(errMin0, errMin1);
+
+    __m256i errMin3 = _mm256_shuffle_epi32(errMin2, _MM_SHUFFLE(1, 0, 3, 2));
+    __m256i errMin4 = _mm256_min_epu32(errMin3, errMin2);
+
+    __m256i errMin5 = _mm256_permute2x128_si256(errMin4, errMin4, (0) | (0 << 4));
+    __m256i errMin6 = _mm256_permute2x128_si256(errMin4, errMin4, (1) | (1 << 4));
+
+    __m256i errMask0 = _mm256_cmpeq_epi32(errMin5, err0);
+    __m256i errMask1 = _mm256_cmpeq_epi32(errMin6, err1);
+
+    uint32_t mask0 = _mm256_movemask_epi8(errMask0);
+    uint32_t mask1 = _mm256_movemask_epi8(errMask1);
+
+    tidx[0] = _bit_scan_forward(mask0) >> 2;
+    tidx[1] = _bit_scan_forward(mask1) >> 2;
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+
+    unsigned int t0 = tsel[tidx[0]];
+    unsigned int t1 = tsel[tidx[1]];
+
+    if (!rotate)
+    {
+        t0 &= 0xFF00FF00;
+        t1 &= 0x00FF00FF;
+    }
+    else
+    {
+        t0 &= 0xCCCCCCCC;
+        t1 &= 0x33333333;
+    }
+
+    // Flip selectors from sign bit
+    unsigned int t2 = (t0 | t1) ^ 0xFFFF0000;
+
+    return d | static_cast<uint64_t>(_bswap(t2)) << 32;
+}
+
+static etcpak_force_inline __m128i r6g7b6_AVX2(__m128 cof, __m128 chf, __m128 cvf) noexcept
+{
+    __m128i co = _mm_cvttps_epi32(cof);
+    __m128i ch = _mm_cvttps_epi32(chf);
+    __m128i cv = _mm_cvttps_epi32(cvf);
+
+    __m128i coh = _mm_packus_epi32(co, ch);
+    __m128i cv0 = _mm_packus_epi32(cv, _mm_setzero_si128());
+
+    __m256i cohv0 = _mm256_inserti128_si256(_mm256_castsi128_si256(coh), cv0, 1);
+    __m256i cohv1 = _mm256_min_epu16(cohv0, _mm256_set1_epi16(1023));
+
+    __m256i cohv2 = _mm256_sub_epi16(cohv1, _mm256_set1_epi16(15));
+    __m256i cohv3 = _mm256_srai_epi16(cohv2, 1);
+
+    __m256i cohvrb0 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(11));
+    __m256i cohvrb1 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(4));
+    __m256i cohvg0 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(9));
+    __m256i cohvg1 = _mm256_add_epi16(cohv3, _mm256_set1_epi16(6));
+
+    __m256i cohvrb2 = _mm256_srai_epi16(cohvrb0, 7);
+    __m256i cohvrb3 = _mm256_srai_epi16(cohvrb1, 7);
+    __m256i cohvg2 = _mm256_srai_epi16(cohvg0, 8);
+    __m256i cohvg3 = _mm256_srai_epi16(cohvg1, 8);
+
+    __m256i cohvrb4 = _mm256_sub_epi16(cohvrb0, cohvrb2);
+    __m256i cohvrb5 = _mm256_sub_epi16(cohvrb4, cohvrb3);
+    __m256i cohvg4 = _mm256_sub_epi16(cohvg0, cohvg2);
+    __m256i cohvg5 = _mm256_sub_epi16(cohvg4, cohvg3);
+
+    __m256i cohvrb6 = _mm256_srai_epi16(cohvrb5, 3);
+    __m256i cohvg6 = _mm256_srai_epi16(cohvg5, 2);
+
+    __m256i cohv4 = _mm256_blend_epi16(cohvg6, cohvrb6, 0x55);
+
+    __m128i cohv5 = _mm_packus_epi16(_mm256_castsi256_si128(cohv4), _mm256_extracti128_si256(cohv4, 1));
+    return _mm_shuffle_epi8(cohv5, _mm_setr_epi8(6, 5, 4, -1, 2, 1, 0, -1, 10, 9, 8, -1, -1, -1, -1, -1));
+}
+
+struct Plane
+{
+    uint64_t plane;
+    uint64_t error;
+    __m256i sum4;
+};
+
+static etcpak_force_inline Plane Planar_AVX2(const uint8_t* src)
+{
+    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i rgb0 = _mm_shuffle_epi8(d0, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb1 = _mm_shuffle_epi8(d1, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb2 = _mm_shuffle_epi8(d2, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+    __m128i rgb3 = _mm_shuffle_epi8(d3, _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, -1, -1, -1, -1));
+
+    __m128i rg0 = _mm_unpacklo_epi32(rgb0, rgb1);
+    __m128i rg1 = _mm_unpacklo_epi32(rgb2, rgb3);
+    __m128i b0 = _mm_unpackhi_epi32(rgb0, rgb1);
+    __m128i b1 = _mm_unpackhi_epi32(rgb2, rgb3);
+
+    // swap channels
+    __m128i b8 = _mm_unpacklo_epi64(rg0, rg1);
+    __m128i g8 = _mm_unpackhi_epi64(rg0, rg1);
+    __m128i r8 = _mm_unpacklo_epi64(b0, b1);
+
+    __m128i t0 = _mm_sad_epu8(r8, _mm_setzero_si128());
+    __m128i t1 = _mm_sad_epu8(g8, _mm_setzero_si128());
+    __m128i t2 = _mm_sad_epu8(b8, _mm_setzero_si128());
+
+    __m128i r8s = _mm_shuffle_epi8(r8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+    __m128i g8s = _mm_shuffle_epi8(g8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+    __m128i b8s = _mm_shuffle_epi8(b8, _mm_set_epi8(0xF, 0xE, 0xB, 0xA, 0x7, 0x6, 0x3, 0x2, 0xD, 0xC, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+
+    __m128i s0 = _mm_sad_epu8(r8s, _mm_setzero_si128());
+    __m128i s1 = _mm_sad_epu8(g8s, _mm_setzero_si128());
+    __m128i s2 = _mm_sad_epu8(b8s, _mm_setzero_si128());
+
+    __m256i sr0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t0), s0, 1);
+    __m256i sg0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t1), s1, 1);
+    __m256i sb0 = _mm256_insertf128_si256(_mm256_castsi128_si256(t2), s2, 1);
+
+    __m256i sr1 = _mm256_slli_epi64(sr0, 32);
+    __m256i sg1 = _mm256_slli_epi64(sg0, 16);
+
+    __m256i srb = _mm256_or_si256(sr1, sb0);
+    __m256i srgb = _mm256_or_si256(srb, sg1);
+
+    __m128i t3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(t0), _mm_castsi128_ps(t1), _MM_SHUFFLE(2, 0, 2, 0)));
+    __m128i t4 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3, 1, 2, 0));
+    __m128i t5 = _mm_hadd_epi32(t3, t4);
+    __m128i t6 = _mm_shuffle_epi32(t5, _MM_SHUFFLE(1, 1, 1, 1));
+    __m128i t7 = _mm_shuffle_epi32(t5, _MM_SHUFFLE(2, 2, 2, 2));
+
+    __m256i sr = _mm256_broadcastw_epi16(t5);
+    __m256i sg = _mm256_broadcastw_epi16(t6);
+    __m256i sb = _mm256_broadcastw_epi16(t7);
+
+    __m256i r08 = _mm256_cvtepu8_epi16(r8);
+    __m256i g08 = _mm256_cvtepu8_epi16(g8);
+    __m256i b08 = _mm256_cvtepu8_epi16(b8);
+
+    __m256i r16 = _mm256_slli_epi16(r08, 4);
+    __m256i g16 = _mm256_slli_epi16(g08, 4);
+    __m256i b16 = _mm256_slli_epi16(b08, 4);
+
+    __m256i difR0 = _mm256_sub_epi16(r16, sr);
+    __m256i difG0 = _mm256_sub_epi16(g16, sg);
+    __m256i difB0 = _mm256_sub_epi16(b16, sb);
+
+    __m256i difRyz = _mm256_madd_epi16(difR0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+    __m256i difGyz = _mm256_madd_epi16(difG0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+    __m256i difByz = _mm256_madd_epi16(difB0, _mm256_set_epi16(255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255, 255, 85, -85, -255));
+
+    __m256i difRxz = _mm256_madd_epi16(difR0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+    __m256i difGxz = _mm256_madd_epi16(difG0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+    __m256i difBxz = _mm256_madd_epi16(difB0, _mm256_set_epi16(255, 255, 255, 255, 85, 85, 85, 85, -85, -85, -85, -85, -255, -255, -255, -255));
+
+    __m256i difRGyz = _mm256_hadd_epi32(difRyz, difGyz);
+    __m256i difByzxz = _mm256_hadd_epi32(difByz, difBxz);
+
+    __m256i difRGxz = _mm256_hadd_epi32(difRxz, difGxz);
+
+    __m128i sumRGyz = _mm_add_epi32(_mm256_castsi256_si128(difRGyz), _mm256_extracti128_si256(difRGyz, 1));
+    __m128i sumByzxz = _mm_add_epi32(_mm256_castsi256_si128(difByzxz), _mm256_extracti128_si256(difByzxz, 1));
+    __m128i sumRGxz = _mm_add_epi32(_mm256_castsi256_si128(difRGxz), _mm256_extracti128_si256(difRGxz, 1));
+
+    __m128i sumRGByz = _mm_hadd_epi32(sumRGyz, sumByzxz);
+    __m128i sumRGByzxz = _mm_hadd_epi32(sumRGxz, sumByzxz);
+
+    __m128i sumRGBxz = _mm_shuffle_epi32(sumRGByzxz, _MM_SHUFFLE(2, 3, 1, 0));
+
+    __m128 sumRGByzf = _mm_cvtepi32_ps(sumRGByz);
+    __m128 sumRGBxzf = _mm_cvtepi32_ps(sumRGBxz);
+
+    const float value = (255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f;
+
+    __m128 scale = _mm_set1_ps(-4.0f / value);
+
+    __m128 af = _mm_mul_ps(sumRGBxzf, scale);
+    __m128 bf = _mm_mul_ps(sumRGByzf, scale);
+
+    __m128 df = _mm_mul_ps(_mm_cvtepi32_ps(t5), _mm_set1_ps(4.0f / 16.0f));
+
+    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
+    __m128 cof0 = _mm_fnmadd_ps(af, _mm_set1_ps(-255.0f), _mm_fnmadd_ps(bf, _mm_set1_ps(-255.0f), df));
+    __m128 chf0 = _mm_fnmadd_ps(af, _mm_set1_ps( 425.0f), _mm_fnmadd_ps(bf, _mm_set1_ps(-255.0f), df));
+    __m128 cvf0 = _mm_fnmadd_ps(af, _mm_set1_ps(-255.0f), _mm_fnmadd_ps(bf, _mm_set1_ps( 425.0f), df));
+
+    // convert to r6g7b6
+    __m128i cohv = r6g7b6_AVX2(cof0, chf0, cvf0);
+
+    uint64_t rgbho = _mm_extract_epi64(cohv, 0);
+    uint32_t rgbv0 = _mm_extract_epi32(cohv, 2);
+
+    // Error calculation
+    auto ro0 = (rgbho >> 48) & 0x3F;
+    auto go0 = (rgbho >> 40) & 0x7F;
+    auto bo0 = (rgbho >> 32) & 0x3F;
+    auto ro1 = (ro0 >> 4) | (ro0 << 2);
+    auto go1 = (go0 >> 6) | (go0 << 1);
+    auto bo1 = (bo0 >> 4) | (bo0 << 2);
+    auto ro2 = (ro1 << 2) + 2;
+    auto go2 = (go1 << 2) + 2;
+    auto bo2 = (bo1 << 2) + 2;
+
+    __m256i ro3 = _mm256_set1_epi16(ro2);
+    __m256i go3 = _mm256_set1_epi16(go2);
+    __m256i bo3 = _mm256_set1_epi16(bo2);
+
+    auto rh0 = (rgbho >> 16) & 0x3F;
+    auto gh0 = (rgbho >>  8) & 0x7F;
+    auto bh0 = (rgbho >>  0) & 0x3F;
+    auto rh1 = (rh0 >> 4) | (rh0 << 2);
+    auto gh1 = (gh0 >> 6) | (gh0 << 1);
+    auto bh1 = (bh0 >> 4) | (bh0 << 2);
+
+    auto rh2 = rh1 - ro1;
+    auto gh2 = gh1 - go1;
+    auto bh2 = bh1 - bo1;
+
+    __m256i rh3 = _mm256_set1_epi16(rh2);
+    __m256i gh3 = _mm256_set1_epi16(gh2);
+    __m256i bh3 = _mm256_set1_epi16(bh2);
+
+    auto rv0 = (rgbv0 >> 16) & 0x3F;
+    auto gv0 = (rgbv0 >>  8) & 0x7F;
+    auto bv0 = (rgbv0 >>  0) & 0x3F;
+    auto rv1 = (rv0 >> 4) | (rv0 << 2);
+    auto gv1 = (gv0 >> 6) | (gv0 << 1);
+    auto bv1 = (bv0 >> 4) | (bv0 << 2);
+
+    auto rv2 = rv1 - ro1;
+    auto gv2 = gv1 - go1;
+    auto bv2 = bv1 - bo1;
+
+    __m256i rv3 = _mm256_set1_epi16(rv2);
+    __m256i gv3 = _mm256_set1_epi16(gv2);
+    __m256i bv3 = _mm256_set1_epi16(bv2);
+
+    __m256i x = _mm256_set_epi16(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+
+    __m256i rh4 = _mm256_mullo_epi16(rh3, x);
+    __m256i gh4 = _mm256_mullo_epi16(gh3, x);
+    __m256i bh4 = _mm256_mullo_epi16(bh3, x);
+
+    __m256i y = _mm256_set_epi16(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0);
+
+    __m256i rv4 = _mm256_mullo_epi16(rv3, y);
+    __m256i gv4 = _mm256_mullo_epi16(gv3, y);
+    __m256i bv4 = _mm256_mullo_epi16(bv3, y);
+
+    __m256i rxy = _mm256_add_epi16(rh4, rv4);
+    __m256i gxy = _mm256_add_epi16(gh4, gv4);
+    __m256i bxy = _mm256_add_epi16(bh4, bv4);
+
+    __m256i rp0 = _mm256_add_epi16(rxy, ro3);
+    __m256i gp0 = _mm256_add_epi16(gxy, go3);
+    __m256i bp0 = _mm256_add_epi16(bxy, bo3);
+
+    __m256i rp1 = _mm256_srai_epi16(rp0, 2);
+    __m256i gp1 = _mm256_srai_epi16(gp0, 2);
+    __m256i bp1 = _mm256_srai_epi16(bp0, 2);
+
+    __m256i rp2 = _mm256_max_epi16(_mm256_min_epi16(rp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+    __m256i gp2 = _mm256_max_epi16(_mm256_min_epi16(gp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+    __m256i bp2 = _mm256_max_epi16(_mm256_min_epi16(bp1, _mm256_set1_epi16(255)), _mm256_setzero_si256());
+
+    __m256i rdif = _mm256_sub_epi16(r08, rp2);
+    __m256i gdif = _mm256_sub_epi16(g08, gp2);
+    __m256i bdif = _mm256_sub_epi16(b08, bp2);
+
+    __m256i rerr = _mm256_mullo_epi16(rdif, _mm256_set1_epi16(38));
+    __m256i gerr = _mm256_mullo_epi16(gdif, _mm256_set1_epi16(76));
+    __m256i berr = _mm256_mullo_epi16(bdif, _mm256_set1_epi16(14));
+
+    __m256i sum0 = _mm256_add_epi16(rerr, gerr);
+    __m256i sum1 = _mm256_add_epi16(sum0, berr);
+
+    __m256i sum2 = _mm256_madd_epi16(sum1, sum1);
+
+    __m128i sum3 = _mm_add_epi32(_mm256_castsi256_si128(sum2), _mm256_extracti128_si256(sum2, 1));
+
+    uint32_t err0 = _mm_extract_epi32(sum3, 0);
+    uint32_t err1 = _mm_extract_epi32(sum3, 1);
+    uint32_t err2 = _mm_extract_epi32(sum3, 2);
+    uint32_t err3 = _mm_extract_epi32(sum3, 3);
+
+    uint64_t error = err0 + err1 + err2 + err3;
+    /**/
+
+    uint32_t rgbv = ( rgbv0 & 0x3F ) | ( ( rgbv0 >> 2 ) & 0x1FC0 ) | ( ( rgbv0 >> 3 ) & 0x7E000 );
+    uint64_t rgbho0_ = ( rgbho & 0x3F0000003F ) | ( ( rgbho >> 2 ) & 0x1FC000001FC0 ) | ( ( rgbho >> 3 ) & 0x7E0000007E000 );
+    uint64_t rgbho0 = ( rgbho0_ & 0x7FFFF ) | ( ( rgbho0_ >> 13 ) & 0x3FFFF80000 );
+
+    uint32_t hi = rgbv | ((rgbho0 & 0x1FFF) << 19);
+    rgbho0 >>= 13;
+    uint32_t lo = ( rgbho0 & 0x1 ) | ( ( rgbho0 & 0x1FE ) << 1 ) | ( ( rgbho0 & 0x600 ) << 2 ) | ( ( rgbho0 & 0x3F800 ) << 5 ) | ( ( rgbho0 & 0x1FC0000 ) << 6 );
+
+    uint32_t idx = ( ( rgbho >> 33 ) & 0xF ) | ( ( rgbho >> 41 ) & 0x10 ) | ( ( rgbho >> 48 ) & 0x20 );
+    lo |= g_flags[idx];
+    uint64_t result = static_cast<uint32_t>(_bswap(lo));
+    result |= static_cast<uint64_t>(static_cast<uint32_t>(_bswap(hi))) << 32;
+
+    Plane plane;
+
+    plane.plane = result;
+    plane.error = error;
+    plane.sum4 = _mm256_permute4x64_epi64(srgb, _MM_SHUFFLE(2, 3, 0, 1));
+
+    return plane;
+}
+
+static etcpak_force_inline uint64_t EncodeSelectors_AVX2( uint64_t d, const uint32_t terr[2][8], const uint32_t tsel[8], const bool rotate, const uint64_t value, const uint32_t error) noexcept
+{
+    size_t tidx[2];
+
+    // Get index of minimum error (terr[0] and terr[1])
+    __m256i err0 = _mm256_load_si256((const __m256i*)terr[0]);
+    __m256i err1 = _mm256_load_si256((const __m256i*)terr[1]);
+
+    __m256i errLo = _mm256_permute2x128_si256(err0, err1, (0) | (2 << 4));
+    __m256i errHi = _mm256_permute2x128_si256(err0, err1, (1) | (3 << 4));
+
+    __m256i errMin0 = _mm256_min_epu32(errLo, errHi);
+
+    __m256i errMin1 = _mm256_shuffle_epi32(errMin0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m256i errMin2 = _mm256_min_epu32(errMin0, errMin1);
+
+    __m256i errMin3 = _mm256_shuffle_epi32(errMin2, _MM_SHUFFLE(1, 0, 3, 2));
+    __m256i errMin4 = _mm256_min_epu32(errMin3, errMin2);
+
+    __m256i errMin5 = _mm256_permute2x128_si256(errMin4, errMin4, (0) | (0 << 4));
+    __m256i errMin6 = _mm256_permute2x128_si256(errMin4, errMin4, (1) | (1 << 4));
+
+    __m256i errMask0 = _mm256_cmpeq_epi32(errMin5, err0);
+    __m256i errMask1 = _mm256_cmpeq_epi32(errMin6, err1);
+
+    uint32_t mask0 = _mm256_movemask_epi8(errMask0);
+    uint32_t mask1 = _mm256_movemask_epi8(errMask1);
+
+    tidx[0] = _bit_scan_forward(mask0) >> 2;
+    tidx[1] = _bit_scan_forward(mask1) >> 2;
+
+    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
+    {
+        return value;
+    }
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+
+    unsigned int t0 = tsel[tidx[0]];
+    unsigned int t1 = tsel[tidx[1]];
+
+    if (!rotate)
+    {
+        t0 &= 0xFF00FF00;
+        t1 &= 0x00FF00FF;
+    }
+    else
+    {
+        t0 &= 0xCCCCCCCC;
+        t1 &= 0x33333333;
+    }
+
+    // Flip selectors from sign bit
+    unsigned int t2 = (t0 | t1) ^ 0xFFFF0000;
+
+    return d | static_cast<uint64_t>(_bswap(t2)) << 32;
+}
+
+#endif
+
+static etcpak_force_inline void Average( const uint8_t* data, v4i* a )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
+    __m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
+    __m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
+    __m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
+    __m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
+    __m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
+    __m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
+    __m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
+
+    __m128i sum0 = _mm_add_epi16(d0l, d1l);
+    __m128i sum1 = _mm_add_epi16(d0h, d1h);
+    __m128i sum2 = _mm_add_epi16(d2l, d3l);
+    __m128i sum3 = _mm_add_epi16(d2h, d3h);
+
+    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
+    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
+    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
+    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
+    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
+    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
+    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
+    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
+
+    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
+    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
+    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
+    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
+
+    __m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
+    __m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
+    __m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
+    __m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
+
+    _mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
+    _mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
+#elif defined __ARM_NEON
+    uint8x16x2_t t0 = vzipq_u8(vld1q_u8(data +  0), uint8x16_t());
+    uint8x16x2_t t1 = vzipq_u8(vld1q_u8(data + 16), uint8x16_t());
+    uint8x16x2_t t2 = vzipq_u8(vld1q_u8(data + 32), uint8x16_t());
+    uint8x16x2_t t3 = vzipq_u8(vld1q_u8(data + 48), uint8x16_t());
+
+    uint16x8x2_t d0 = { vreinterpretq_u16_u8(t0.val[0]), vreinterpretq_u16_u8(t0.val[1]) };
+    uint16x8x2_t d1 = { vreinterpretq_u16_u8(t1.val[0]), vreinterpretq_u16_u8(t1.val[1]) };
+    uint16x8x2_t d2 = { vreinterpretq_u16_u8(t2.val[0]), vreinterpretq_u16_u8(t2.val[1]) };
+    uint16x8x2_t d3 = { vreinterpretq_u16_u8(t3.val[0]), vreinterpretq_u16_u8(t3.val[1]) };
+
+    uint16x8x2_t s0 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[0] ), vreinterpretq_s16_u16( d1.val[0] ) ) ), uint16x8_t());
+    uint16x8x2_t s1 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[1] ), vreinterpretq_s16_u16( d1.val[1] ) ) ), uint16x8_t());
+    uint16x8x2_t s2 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[0] ), vreinterpretq_s16_u16( d3.val[0] ) ) ), uint16x8_t());
+    uint16x8x2_t s3 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[1] ), vreinterpretq_s16_u16( d3.val[1] ) ) ), uint16x8_t());
+
+    uint32x4x2_t sum0 = { vreinterpretq_u32_u16(s0.val[0]), vreinterpretq_u32_u16(s0.val[1]) };
+    uint32x4x2_t sum1 = { vreinterpretq_u32_u16(s1.val[0]), vreinterpretq_u32_u16(s1.val[1]) };
+    uint32x4x2_t sum2 = { vreinterpretq_u32_u16(s2.val[0]), vreinterpretq_u32_u16(s2.val[1]) };
+    uint32x4x2_t sum3 = { vreinterpretq_u32_u16(s3.val[0]), vreinterpretq_u32_u16(s3.val[1]) };
+
+    uint32x4_t b0 = vaddq_u32(sum0.val[0], sum0.val[1]);
+    uint32x4_t b1 = vaddq_u32(sum1.val[0], sum1.val[1]);
+    uint32x4_t b2 = vaddq_u32(sum2.val[0], sum2.val[1]);
+    uint32x4_t b3 = vaddq_u32(sum3.val[0], sum3.val[1]);
+
+    uint32x4_t a0 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b2, b3), vdupq_n_u32(4)), 3);
+    uint32x4_t a1 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b0, b1), vdupq_n_u32(4)), 3);
+    uint32x4_t a2 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b1, b3), vdupq_n_u32(4)), 3);
+    uint32x4_t a3 = vshrq_n_u32(vqaddq_u32(vqaddq_u32(b0, b2), vdupq_n_u32(4)), 3);
+
+    uint16x8_t o0 = vcombine_u16(vqmovun_s32(vreinterpretq_s32_u32( a0 )), vqmovun_s32(vreinterpretq_s32_u32( a1 )));
+    uint16x8_t o1 = vcombine_u16(vqmovun_s32(vreinterpretq_s32_u32( a2 )), vqmovun_s32(vreinterpretq_s32_u32( a3 )));
+
+    a[0] = v4i{o0[2], o0[1], o0[0], 0};
+    a[1] = v4i{o0[6], o0[5], o0[4], 0};
+    a[2] = v4i{o1[2], o1[1], o1[0], 0};
+    a[3] = v4i{o1[6], o1[5], o1[4], 0};
+#else
+    uint32_t r[4];
+    uint32_t g[4];
+    uint32_t b[4];
+
+    memset(r, 0, sizeof(r));
+    memset(g, 0, sizeof(g));
+    memset(b, 0, sizeof(b));
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            int index = (j & 2) + (i >> 1);
+            b[index] += *data++;
+            g[index] += *data++;
+            r[index] += *data++;
+            data++;
+        }
+    }
+
+    a[0] = v4i{ uint16_t( (r[2] + r[3] + 4) / 8 ), uint16_t( (g[2] + g[3] + 4) / 8 ), uint16_t( (b[2] + b[3] + 4) / 8 ), 0};
+    a[1] = v4i{ uint16_t( (r[0] + r[1] + 4) / 8 ), uint16_t( (g[0] + g[1] + 4) / 8 ), uint16_t( (b[0] + b[1] + 4) / 8 ), 0};
+    a[2] = v4i{ uint16_t( (r[1] + r[3] + 4) / 8 ), uint16_t( (g[1] + g[3] + 4) / 8 ), uint16_t( (b[1] + b[3] + 4) / 8 ), 0};
+    a[3] = v4i{ uint16_t( (r[0] + r[2] + 4) / 8 ), uint16_t( (g[0] + g[2] + 4) / 8 ), uint16_t( (b[0] + b[2] + 4) / 8 ), 0};
+#endif
+}
+
+static etcpak_force_inline void CalcErrorBlock( const uint8_t* data, unsigned int err[4][4] )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
+
+    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
+    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
+
+    __m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
+    __m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
+    __m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
+    __m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
+    __m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
+    __m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
+    __m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
+    __m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
+
+    __m128i sum0 = _mm_add_epi16(d0l, d1l);
+    __m128i sum1 = _mm_add_epi16(d0h, d1h);
+    __m128i sum2 = _mm_add_epi16(d2l, d3l);
+    __m128i sum3 = _mm_add_epi16(d2h, d3h);
+
+    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
+    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
+    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
+    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
+    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
+    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
+    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
+    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
+
+    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
+    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
+    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
+    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
+
+    __m128i a0 = _mm_add_epi32(b2, b3);
+    __m128i a1 = _mm_add_epi32(b0, b1);
+    __m128i a2 = _mm_add_epi32(b1, b3);
+    __m128i a3 = _mm_add_epi32(b0, b2);
+
+    _mm_storeu_si128((__m128i*)&err[0], a0);
+    _mm_storeu_si128((__m128i*)&err[1], a1);
+    _mm_storeu_si128((__m128i*)&err[2], a2);
+    _mm_storeu_si128((__m128i*)&err[3], a3);
+#elif defined __ARM_NEON
+    uint8x16x2_t t0 = vzipq_u8(vld1q_u8(data +  0), uint8x16_t());
+    uint8x16x2_t t1 = vzipq_u8(vld1q_u8(data + 16), uint8x16_t());
+    uint8x16x2_t t2 = vzipq_u8(vld1q_u8(data + 32), uint8x16_t());
+    uint8x16x2_t t3 = vzipq_u8(vld1q_u8(data + 48), uint8x16_t());
+
+    uint16x8x2_t d0 = { vreinterpretq_u16_u8(t0.val[0]), vreinterpretq_u16_u8(t0.val[1]) };
+    uint16x8x2_t d1 = { vreinterpretq_u16_u8(t1.val[0]), vreinterpretq_u16_u8(t1.val[1]) };
+    uint16x8x2_t d2 = { vreinterpretq_u16_u8(t2.val[0]), vreinterpretq_u16_u8(t2.val[1]) };
+    uint16x8x2_t d3 = { vreinterpretq_u16_u8(t3.val[0]), vreinterpretq_u16_u8(t3.val[1]) };
+
+    uint16x8x2_t s0 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[0] ), vreinterpretq_s16_u16( d1.val[0] ))), uint16x8_t());
+    uint16x8x2_t s1 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d0.val[1] ), vreinterpretq_s16_u16( d1.val[1] ))), uint16x8_t());
+    uint16x8x2_t s2 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[0] ), vreinterpretq_s16_u16( d3.val[0] ))), uint16x8_t());
+    uint16x8x2_t s3 = vzipq_u16(vreinterpretq_u16_s16( vaddq_s16(vreinterpretq_s16_u16( d2.val[1] ), vreinterpretq_s16_u16( d3.val[1] ))), uint16x8_t());
+
+    uint32x4x2_t sum0 = { vreinterpretq_u32_u16(s0.val[0]), vreinterpretq_u32_u16(s0.val[1]) };
+    uint32x4x2_t sum1 = { vreinterpretq_u32_u16(s1.val[0]), vreinterpretq_u32_u16(s1.val[1]) };
+    uint32x4x2_t sum2 = { vreinterpretq_u32_u16(s2.val[0]), vreinterpretq_u32_u16(s2.val[1]) };
+    uint32x4x2_t sum3 = { vreinterpretq_u32_u16(s3.val[0]), vreinterpretq_u32_u16(s3.val[1]) };
+
+    uint32x4_t b0 = vaddq_u32(sum0.val[0], sum0.val[1]);
+    uint32x4_t b1 = vaddq_u32(sum1.val[0], sum1.val[1]);
+    uint32x4_t b2 = vaddq_u32(sum2.val[0], sum2.val[1]);
+    uint32x4_t b3 = vaddq_u32(sum3.val[0], sum3.val[1]);
+
+    uint32x4_t a0 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b2, b3) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a1 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b0, b1) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a2 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b1, b3) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+    uint32x4_t a3 = vreinterpretq_u32_u8( vandq_u8(vreinterpretq_u8_u32( vqaddq_u32(b0, b2) ), vreinterpretq_u8_u32( vdupq_n_u32(0x00FFFFFF)) ) );
+
+    vst1q_u32(err[0], a0);
+    vst1q_u32(err[1], a1);
+    vst1q_u32(err[2], a2);
+    vst1q_u32(err[3], a3);
+#else
+    unsigned int terr[4][4];
+
+    memset(terr, 0, 16 * sizeof(unsigned int));
+
+    for( int j=0; j<4; j++ )
+    {
+        for( int i=0; i<4; i++ )
+        {
+            int index = (j & 2) + (i >> 1);
+            unsigned int d = *data++;
+            terr[index][0] += d;
+            d = *data++;
+            terr[index][1] += d;
+            d = *data++;
+            terr[index][2] += d;
+            data++;
+        }
+    }
+
+    for( int i=0; i<3; i++ )
+    {
+        err[0][i] = terr[2][i] + terr[3][i];
+        err[1][i] = terr[0][i] + terr[1][i];
+        err[2][i] = terr[1][i] + terr[3][i];
+        err[3][i] = terr[0][i] + terr[2][i];
+    }
+    for( int i=0; i<4; i++ )
+    {
+        err[i][3] = 0;
+    }
+#endif
+}
+
+static etcpak_force_inline unsigned int CalcError( const unsigned int block[4], const v4i& average )
+{
+    unsigned int err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
+    err -= block[0] * 2 * average[2];
+    err -= block[1] * 2 * average[1];
+    err -= block[2] * 2 * average[0];
+    err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
+    return err;
+}
+
+static etcpak_force_inline void ProcessAverages( v4i* a )
+{
+#ifdef __SSE4_1__
+    for( int i=0; i<2; i++ )
+    {
+        __m128i d = _mm_loadu_si128((__m128i*)a[i*2].data());
+
+        __m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
+
+        __m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
+
+        __m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
+        __m128i diff = _mm_sub_epi16(c, c1);
+        diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
+        diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
+
+        __m128i co = _mm_add_epi16(c1, diff);
+
+        c = _mm_blend_epi16(co, c, 0xF0);
+
+        __m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
+
+        _mm_storeu_si128((__m128i*)a[4+i*2].data(), a0);
+    }
+
+    for( int i=0; i<2; i++ )
+    {
+        __m128i d = _mm_loadu_si128((__m128i*)a[i*2].data());
+
+        __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
+        __m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
+
+        __m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
+
+        _mm_storeu_si128((__m128i*)a[i*2].data(), t2);
+    }
+#elif defined __ARM_NEON
+    for( int i=0; i<2; i++ )
+    {
+        int16x8_t d = vld1q_s16((int16_t*)&a[i*2]);
+        int16x8_t t = vaddq_s16(vmulq_s16(d, vdupq_n_s16(31)), vdupq_n_s16(128));
+        int16x8_t c = vshrq_n_s16(vaddq_s16(t, vshrq_n_s16(t, 8)), 8);
+
+        int16x8_t c1 = vcombine_s16(vget_high_s16(c), vget_high_s16(c));
+        int16x8_t diff = vsubq_s16(c, c1);
+        diff = vmaxq_s16(diff, vdupq_n_s16(-4));
+        diff = vminq_s16(diff, vdupq_n_s16(3));
+
+        int16x8_t co = vaddq_s16(c1, diff);
+
+        c = vcombine_s16(vget_low_s16(co), vget_high_s16(c));
+
+        int16x8_t a0 = vorrq_s16(vshlq_n_s16(c, 3), vshrq_n_s16(c, 2));
+
+        vst1q_s16((int16_t*)&a[4+i*2], a0);
+    }
+
+    for( int i=0; i<2; i++ )
+    {
+        int16x8_t d = vld1q_s16((int16_t*)&a[i*2]);
+
+        int16x8_t t0 = vaddq_s16(vmulq_s16(d, vdupq_n_s16(15)), vdupq_n_s16(128));
+        int16x8_t t1 = vshrq_n_s16(vaddq_s16(t0, vshrq_n_s16(t0, 8)), 8);
+
+        int16x8_t t2 = vorrq_s16(t1, vshlq_n_s16(t1, 4));
+
+        vst1q_s16((int16_t*)&a[i*2], t2);
+    }
+#else
+    for( int i=0; i<2; i++ )
+    {
+        for( int j=0; j<3; j++ )
+        {
+            int32_t c1 = mul8bit( a[i*2+1][j], 31 );
+            int32_t c2 = mul8bit( a[i*2][j], 31 );
+
+            int32_t diff = c2 - c1;
+            if( diff > 3 ) diff = 3;
+            else if( diff < -4 ) diff = -4;
+
+            int32_t co = c1 + diff;
+
+            a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
+            a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
+        }
+    }
+
+    for( int i=0; i<4; i++ )
+    {
+        a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
+        a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
+        a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
+    }
+#endif
+}
+
+static etcpak_force_inline void EncodeAverages( uint64_t& _d, const v4i* a, size_t idx )
+{
+    auto d = _d;
+    d |= ( idx << 24 );
+    size_t base = idx << 1;
+
+    if( ( idx & 0x2 ) == 0 )
+    {
+        for( int i=0; i<3; i++ )
+        {
+            d |= uint64_t( a[base+0][i] >> 4 ) << ( i*8 );
+            d |= uint64_t( a[base+1][i] >> 4 ) << ( i*8 + 4 );
+        }
+    }
+    else
+    {
+        for( int i=0; i<3; i++ )
+        {
+            d |= uint64_t( a[base+1][i] & 0xF8 ) << ( i*8 );
+            int32_t c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
+            c &= ~0xFFFFFFF8;
+            d |= ((uint64_t)c) << ( i*8 );
+        }
+    }
+    _d = d;
+}
+
+static etcpak_force_inline uint64_t CheckSolid( const uint8_t* src )
+{
+#ifdef __SSE4_1__
+    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
+    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
+    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
+    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
+
+    __m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128i c0 = _mm_cmpeq_epi8(d0, c);
+    __m128i c1 = _mm_cmpeq_epi8(d1, c);
+    __m128i c2 = _mm_cmpeq_epi8(d2, c);
+    __m128i c3 = _mm_cmpeq_epi8(d3, c);
+
+    __m128i m0 = _mm_and_si128(c0, c1);
+    __m128i m1 = _mm_and_si128(c2, c3);
+    __m128i m = _mm_and_si128(m0, m1);
+
+    if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
+    {
+        return 0;
+    }
+#elif defined __ARM_NEON
+    int32x4_t d0 = vld1q_s32((int32_t*)src +  0);
+    int32x4_t d1 = vld1q_s32((int32_t*)src +  4);
+    int32x4_t d2 = vld1q_s32((int32_t*)src +  8);
+    int32x4_t d3 = vld1q_s32((int32_t*)src + 12);
+
+    int32x4_t c = vdupq_n_s32(d0[0]);
+
+    int32x4_t c0 = vreinterpretq_s32_u32(vceqq_s32(d0, c));
+    int32x4_t c1 = vreinterpretq_s32_u32(vceqq_s32(d1, c));
+    int32x4_t c2 = vreinterpretq_s32_u32(vceqq_s32(d2, c));
+    int32x4_t c3 = vreinterpretq_s32_u32(vceqq_s32(d3, c));
+
+    int32x4_t m0 = vandq_s32(c0, c1);
+    int32x4_t m1 = vandq_s32(c2, c3);
+    int64x2_t m = vreinterpretq_s64_s32(vandq_s32(m0, m1));
+
+    if (m[0] != -1 || m[1] != -1)
+    {
+        return 0;
+    }
+#else
+    const uint8_t* ptr = src + 4;
+    for( int i=1; i<16; i++ )
+    {
+        if( memcmp( src, ptr, 4 ) != 0 )
+        {
+            return 0;
+        }
+        ptr += 4;
+    }
+#endif
+    return 0x02000000 |
+        ( (unsigned int)( src[0] & 0xF8 ) << 16 ) |
+        ( (unsigned int)( src[1] & 0xF8 ) << 8 ) |
+        ( (unsigned int)( src[2] & 0xF8 ) );
+}
+
+static etcpak_force_inline void PrepareAverages( v4i a[8], const uint8_t* src, unsigned int err[4] )
+{
+    Average( src, a );
+    ProcessAverages( a );
+
+    unsigned int errblock[4][4];
+    CalcErrorBlock( src, errblock );
+
+    for( int i=0; i<4; i++ )
+    {
+        err[i/2] += CalcError( errblock[i], a[i] );
+        err[2+i/2] += CalcError( errblock[i], a[i+4] );
+    }
+}
+
+static etcpak_force_inline void FindBestFit( uint64_t terr[2][8], uint16_t tsel[16][8], v4i a[8], const uint32_t* id, const uint8_t* data )
+{
+    for( size_t i=0; i<16; i++ )
+    {
+        uint16_t* sel = tsel[i];
+        unsigned int bid = id[i];
+        uint64_t* ter = terr[bid%2];
+
+        uint8_t b = *data++;
+        uint8_t g = *data++;
+        uint8_t r = *data++;
+        data++;
+
+        int dr = a[bid][0] - r;
+        int dg = a[bid][1] - g;
+        int db = a[bid][2] - b;
+
+#ifdef __SSE4_1__
+        // Reference implementation
+
+        __m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        __m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
+        __m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
+        __m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
+        __m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
+
+        __m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
+        __m128i minError0 = _mm_min_epi32(error0, error1);
+
+        __m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
+        __m128i minError1 = _mm_min_epi32(error2, error3);
+
+        __m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
+        __m128i minError = _mm_min_epi32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        __m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
+        __m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
+        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
+        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
+        __m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
+        __m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
+        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
+        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
+        error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
+        error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
+        error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
+
+        index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
+        minError0 = _mm_min_epi32(error0, error1);
+
+        index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
+        minError1 = _mm_min_epi32(error2, error3);
+
+        __m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
+        minError = _mm_min_epi32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
+        squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
+        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
+        _mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
+        minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
+        squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
+        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
+        _mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
+        __m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
+        _mm_storeu_si128((__m128i*)sel, minIndex);
+#elif defined __ARM_NEON
+        int32x4_t pix = vdupq_n_s32(dr * 77 + dg * 151 + db * 28);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        uint32x4_t error0 = vreinterpretq_u32_s32(vabsq_s32(vaddq_s32(pix, g_table256_NEON[0])));
+        uint32x4_t error1 = vreinterpretq_u32_s32(vabsq_s32(vaddq_s32(pix, g_table256_NEON[1])));
+        uint32x4_t error2 = vreinterpretq_u32_s32(vabsq_s32(vsubq_s32(pix, g_table256_NEON[0])));
+        uint32x4_t error3 = vreinterpretq_u32_s32(vabsq_s32(vsubq_s32(pix, g_table256_NEON[1])));
+
+        uint32x4_t index0 = vandq_u32(vcltq_u32(error1, error0), vdupq_n_u32(1));
+        uint32x4_t minError0 = vminq_u32(error0, error1);
+
+        uint32x4_t index1 = vreinterpretq_u32_s32(vsubq_s32(vdupq_n_s32(2), vreinterpretq_s32_u32(vcltq_u32(error3, error2))));
+        uint32x4_t minError1 = vminq_u32(error2, error3);
+
+        uint32x4_t blendMask = vcltq_u32(minError1, minError0);
+        uint32x4_t minIndex0 = vorrq_u32(vbicq_u32(index0, blendMask), vandq_u32(index1, blendMask));
+        uint32x4_t minError = vminq_u32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        uint32x4_t squareErrorLow = vmulq_u32(minError, minError);
+        uint32x4_t squareErrorHigh = vshrq_n_u32(vreinterpretq_u32_s32(vqdmulhq_s32(vreinterpretq_s32_u32(minError), vreinterpretq_s32_u32(minError))), 1);
+        uint32x4x2_t squareErrorZip = vzipq_u32(squareErrorLow, squareErrorHigh);
+        uint64x2x2_t squareError = { vreinterpretq_u64_u32(squareErrorZip.val[0]), vreinterpretq_u64_u32(squareErrorZip.val[1]) };
+        squareError.val[0] = vaddq_u64(squareError.val[0], vld1q_u64(ter + 0));
+        squareError.val[1] = vaddq_u64(squareError.val[1], vld1q_u64(ter + 2));
+        vst1q_u64(ter + 0, squareError.val[0]);
+        vst1q_u64(ter + 2, squareError.val[1]);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        error0 = vreinterpretq_u32_s32( vabsq_s32(vaddq_s32(pix, g_table256_NEON[2])));
+        error1 = vreinterpretq_u32_s32( vabsq_s32(vaddq_s32(pix, g_table256_NEON[3])));
+        error2 = vreinterpretq_u32_s32( vabsq_s32(vsubq_s32(pix, g_table256_NEON[2])));
+        error3 = vreinterpretq_u32_s32( vabsq_s32(vsubq_s32(pix, g_table256_NEON[3])));
+
+        index0 = vandq_u32(vcltq_u32(error1, error0), vdupq_n_u32(1));
+        minError0 = vminq_u32(error0, error1);
+
+        index1 = vreinterpretq_u32_s32( vsubq_s32(vdupq_n_s32(2), vreinterpretq_s32_u32(vcltq_u32(error3, error2))) );
+        minError1 = vminq_u32(error2, error3);
+
+        blendMask = vcltq_u32(minError1, minError0);
+        uint32x4_t minIndex1 = vorrq_u32(vbicq_u32(index0, blendMask), vandq_u32(index1, blendMask));
+        minError = vminq_u32(minError0, minError1);
+
+        // Squaring the minimum error to produce correct values when adding
+        squareErrorLow = vmulq_u32(minError, minError);
+        squareErrorHigh = vshrq_n_u32(vreinterpretq_u32_s32( vqdmulhq_s32(vreinterpretq_s32_u32(minError), vreinterpretq_s32_u32(minError)) ), 1 );
+        squareErrorZip = vzipq_u32(squareErrorLow, squareErrorHigh);
+        squareError.val[0] = vaddq_u64(vreinterpretq_u64_u32( squareErrorZip.val[0] ), vld1q_u64(ter + 4));
+        squareError.val[1] = vaddq_u64(vreinterpretq_u64_u32( squareErrorZip.val[1] ), vld1q_u64(ter + 6));
+        vst1q_u64(ter + 4, squareError.val[0]);
+        vst1q_u64(ter + 6, squareError.val[1]);
+
+        uint16x8_t minIndex = vcombine_u16(vqmovn_u32(minIndex0), vqmovn_u32(minIndex1));
+        vst1q_u16(sel, minIndex);
+#else
+        int pix = dr * 77 + dg * 151 + db * 28;
+
+        for( int t=0; t<8; t++ )
+        {
+            const int64_t* tab = g_table256[t];
+            unsigned int idx = 0;
+            uint64_t err = sq( tab[0] + pix );
+            for( int j=1; j<4; j++ )
+            {
+                uint64_t local = sq( tab[j] + pix );
+                if( local < err )
+                {
+                    err = local;
+                    idx = j;
+                }
+            }
+            *sel++ = idx;
+            *ter++ += err;
+        }
+#endif
+    }
+}
+
+#if defined __SSE4_1__ || defined __ARM_NEON
+// Non-reference implementation, but faster. Produces same results as the AVX2 version
+static etcpak_force_inline void FindBestFit( uint32_t terr[2][8], uint16_t tsel[16][8], v4i a[8], const uint32_t* id, const uint8_t* data )
+{
+    for( size_t i=0; i<16; i++ )
+    {
+        uint16_t* sel = tsel[i];
+        unsigned int bid = id[i];
+        uint32_t* ter = terr[bid%2];
+
+        uint8_t b = *data++;
+        uint8_t g = *data++;
+        uint8_t r = *data++;
+        data++;
+
+        int dr = a[bid][0] - r;
+        int dg = a[bid][1] - g;
+        int db = a[bid][2] - b;
+
+#ifdef __SSE4_1__
+        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
+        // This produces slightly different results, but is significant faster
+        __m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
+        __m128i pix = _mm_abs_epi16(pixel);
+
+        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
+        // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
+        __m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
+        __m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
+
+        __m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
+        __m128i minError = _mm_min_epi16(error0, error1);
+
+        // Exploiting symmetry of the selector table and use the sign bit
+        // This produces slightly different results, but is needed to produce same results as AVX2 implementation
+        __m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
+        __m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
+
+        // Squaring the minimum error to produce correct values when adding
+        __m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
+        __m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
+
+        __m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
+        __m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
+
+        squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
+        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
+        squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
+        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
+
+        _mm_storeu_si128((__m128i*)sel, minIndex);
+#elif defined __ARM_NEON
+        int16x8_t pixel = vdupq_n_s16( dr * 38 + dg * 76 + db * 14 );
+        int16x8_t pix = vabsq_s16( pixel );
+
+        int16x8_t error0 = vabsq_s16( vsubq_s16( pix, g_table128_NEON[0] ) );
+        int16x8_t error1 = vabsq_s16( vsubq_s16( pix, g_table128_NEON[1] ) );
+
+        int16x8_t index = vandq_s16( vreinterpretq_s16_u16( vcltq_s16( error1, error0 ) ), vdupq_n_s16( 1 ) );
+        int16x8_t minError = vminq_s16( error0, error1 );
+
+        int16x8_t indexBit = vandq_s16( vmvnq_s16( vshrq_n_s16( pixel, 15 ) ), vdupq_n_s16( -1 ) );
+        int16x8_t minIndex = vorrq_s16( index, vaddq_s16( indexBit, indexBit ) );
+
+        int16x4_t minErrorLow = vget_low_s16( minError );
+        int16x4_t minErrorHigh = vget_high_s16( minError );
+
+        int32x4_t squareErrorLow = vmull_s16( minErrorLow, minErrorLow );
+        int32x4_t squareErrorHigh = vmull_s16( minErrorHigh, minErrorHigh );
+
+        int32x4_t squareErrorSumLow = vaddq_s32( squareErrorLow, vld1q_s32( (int32_t*)ter ) );
+        int32x4_t squareErrorSumHigh = vaddq_s32( squareErrorHigh, vld1q_s32( (int32_t*)ter + 4 ) );
+
+        vst1q_s32( (int32_t*)ter, squareErrorSumLow );
+        vst1q_s32( (int32_t*)ter + 4, squareErrorSumHigh );
+
+        vst1q_s16( (int16_t*)sel, minIndex );
+#endif
+    }
+}
+#endif
+
+static etcpak_force_inline uint8_t convert6(float f)
+{
+    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
+    return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
+}
+
+static etcpak_force_inline uint8_t convert7(float f)
+{
+    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
+    return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
+}
+
+static etcpak_force_inline std::pair<uint64_t, uint64_t> Planar(const uint8_t* src)
+{
+    int32_t r = 0;
+    int32_t g = 0;
+    int32_t b = 0;
+
+    for (int i = 0; i < 16; ++i)
+    {
+        b += src[i * 4 + 0];
+        g += src[i * 4 + 1];
+        r += src[i * 4 + 2];
+    }
+
+    int32_t difRyz = 0;
+    int32_t difGyz = 0;
+    int32_t difByz = 0;
+    int32_t difRxz = 0;
+    int32_t difGxz = 0;
+    int32_t difBxz = 0;
+
+    const int32_t scaling[] = { -255, -85, 85, 255 };
+
+    for (int i = 0; i < 16; ++i)
+    {
+        int32_t difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
+        int32_t difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
+        int32_t difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
+
+        difRyz += difR * scaling[i % 4];
+        difGyz += difG * scaling[i % 4];
+        difByz += difB * scaling[i % 4];
+
+        difRxz += difR * scaling[i / 4];
+        difGxz += difG * scaling[i / 4];
+        difBxz += difB * scaling[i / 4];
+    }
+
+    const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
+
+    float aR = difRxz * scale;
+    float aG = difGxz * scale;
+    float aB = difBxz * scale;
+
+    float bR = difRyz * scale;
+    float bG = difGyz * scale;
+    float bB = difByz * scale;
+
+    float dR = r * (4.0f / 16.0f);
+    float dG = g * (4.0f / 16.0f);
+    float dB = b * (4.0f / 16.0f);
+
+    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
+    float cofR = std::fma(aR,  255.0f, std::fma(bR,  255.0f, dR));
+    float cofG = std::fma(aG,  255.0f, std::fma(bG,  255.0f, dG));
+    float cofB = std::fma(aB,  255.0f, std::fma(bB,  255.0f, dB));
+    float chfR = std::fma(aR, -425.0f, std::fma(bR,  255.0f, dR));
+    float chfG = std::fma(aG, -425.0f, std::fma(bG,  255.0f, dG));
+    float chfB = std::fma(aB, -425.0f, std::fma(bB,  255.0f, dB));
+    float cvfR = std::fma(aR,  255.0f, std::fma(bR, -425.0f, dR));
+    float cvfG = std::fma(aG,  255.0f, std::fma(bG, -425.0f, dG));
+    float cvfB = std::fma(aB,  255.0f, std::fma(bB, -425.0f, dB));
+
+    // convert to r6g7b6
+    int32_t coR = convert6(cofR);
+    int32_t coG = convert7(cofG);
+    int32_t coB = convert6(cofB);
+    int32_t chR = convert6(chfR);
+    int32_t chG = convert7(chfG);
+    int32_t chB = convert6(chfB);
+    int32_t cvR = convert6(cvfR);
+    int32_t cvG = convert7(cvfG);
+    int32_t cvB = convert6(cvfB);
+
+    // Error calculation
+    auto ro0 = coR;
+    auto go0 = coG;
+    auto bo0 = coB;
+    auto ro1 = (ro0 >> 4) | (ro0 << 2);
+    auto go1 = (go0 >> 6) | (go0 << 1);
+    auto bo1 = (bo0 >> 4) | (bo0 << 2);
+    auto ro2 = (ro1 << 2) + 2;
+    auto go2 = (go1 << 2) + 2;
+    auto bo2 = (bo1 << 2) + 2;
+
+    auto rh0 = chR;
+    auto gh0 = chG;
+    auto bh0 = chB;
+    auto rh1 = (rh0 >> 4) | (rh0 << 2);
+    auto gh1 = (gh0 >> 6) | (gh0 << 1);
+    auto bh1 = (bh0 >> 4) | (bh0 << 2);
+
+    auto rh2 = rh1 - ro1;
+    auto gh2 = gh1 - go1;
+    auto bh2 = bh1 - bo1;
+
+    auto rv0 = cvR;
+    auto gv0 = cvG;
+    auto bv0 = cvB;
+    auto rv1 = (rv0 >> 4) | (rv0 << 2);
+    auto gv1 = (gv0 >> 6) | (gv0 << 1);
+    auto bv1 = (bv0 >> 4) | (bv0 << 2);
+
+    auto rv2 = rv1 - ro1;
+    auto gv2 = gv1 - go1;
+    auto bv2 = bv1 - bo1;
+
+    uint64_t error = 0;
+
+    for (int i = 0; i < 16; ++i)
+    {
+        int32_t cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
+        int32_t cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
+        int32_t cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
+
+        int32_t difB = static_cast<int>(src[i * 4 + 0]) - cB;
+        int32_t difG = static_cast<int>(src[i * 4 + 1]) - cG;
+        int32_t difR = static_cast<int>(src[i * 4 + 2]) - cR;
+
+        int32_t dif = difR * 38 + difG * 76 + difB * 14;
+
+        error += dif * dif;
+    }
+
+    /**/
+    uint32_t rgbv = cvB | (cvG << 6) | (cvR << 13);
+    uint32_t rgbh = chB | (chG << 6) | (chR << 13);
+    uint32_t hi = rgbv | ((rgbh & 0x1FFF) << 19);
+    uint32_t lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
+    lo |= ((coB & 0x07) <<  7) | ((coB & 0x18) <<  8) | ((coB & 0x20) << 11);
+    lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
+    lo |= coR << 25;
+
+    const auto idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
+
+    lo |= g_flags[idx];
+
+    uint64_t result = static_cast<uint32_t>(_bswap(lo));
+    result |= static_cast<uint64_t>(static_cast<uint32_t>(_bswap(hi))) << 32;
+
+    return std::make_pair(result, error);
+}
+
+#ifdef __ARM_NEON
+
+static etcpak_force_inline int32x2_t Planar_NEON_DifXZ( int16x8_t dif_lo, int16x8_t dif_hi )
+{
+    int32x4_t dif0 = vmull_n_s16( vget_low_s16( dif_lo ), -255 );
+    int32x4_t dif1 = vmull_n_s16( vget_high_s16( dif_lo ), -85 );
+    int32x4_t dif2 = vmull_n_s16( vget_low_s16( dif_hi ), 85 );
+    int32x4_t dif3 = vmull_n_s16( vget_high_s16( dif_hi ), 255 );
+    int32x4_t dif4 = vaddq_s32( vaddq_s32( dif0, dif1 ), vaddq_s32( dif2, dif3 ) );
+
+#ifndef __aarch64__
+    int32x2_t dif5 = vpadd_s32( vget_low_s32( dif4 ), vget_high_s32( dif4 ) );
+    return vpadd_s32( dif5, dif5 );
+#else
+    return vdup_n_s32( vaddvq_s32( dif4 ) );
+#endif
+}
+
+static etcpak_force_inline int32x2_t Planar_NEON_DifYZ( int16x8_t dif_lo, int16x8_t dif_hi )
+{
+    int16x4_t scaling = { -255, -85, 85, 255 };
+    int32x4_t dif0 = vmull_s16( vget_low_s16( dif_lo ), scaling );
+    int32x4_t dif1 = vmull_s16( vget_high_s16( dif_lo ), scaling );
+    int32x4_t dif2 = vmull_s16( vget_low_s16( dif_hi ), scaling );
+    int32x4_t dif3 = vmull_s16( vget_high_s16( dif_hi ), scaling );
+    int32x4_t dif4 = vaddq_s32( vaddq_s32( dif0, dif1 ), vaddq_s32( dif2, dif3 ) );
+
+#ifndef __aarch64__
+    int32x2_t dif5 = vpadd_s32( vget_low_s32( dif4 ), vget_high_s32( dif4 ) );
+    return vpadd_s32( dif5, dif5 );
+#else
+    return vdup_n_s32( vaddvq_s32( dif4 ) );
+#endif
+}
+
+static etcpak_force_inline int16x8_t Planar_NEON_SumWide( uint8x16_t src )
+{
+    uint16x8_t accu8 = vpaddlq_u8( src );
+#ifndef __aarch64__
+    uint16x4_t accu4 = vpadd_u16( vget_low_u16( accu8 ), vget_high_u16( accu8 ) );
+    uint16x4_t accu2 = vpadd_u16( accu4, accu4 );
+    uint16x4_t accu1 = vpadd_u16( accu2, accu2 );
+    return vreinterpretq_s16_u16( vcombine_u16( accu1, accu1 ) );
+#else 
+    return vdupq_n_s16( vaddvq_u16( accu8 ) );
+#endif
+}
+
+static etcpak_force_inline int16x8_t convert6_NEON( int32x4_t lo, int32x4_t hi )
+{
+    uint16x8_t x = vcombine_u16( vqmovun_s32( lo ), vqmovun_s32( hi ) );
+    int16x8_t i = vreinterpretq_s16_u16( vshrq_n_u16( vqshlq_n_u16( x, 6 ), 6) ); // clamp 0-1023
+    i = vhsubq_s16( i, vdupq_n_s16( 15 ) );
+
+    int16x8_t ip11 = vaddq_s16( i, vdupq_n_s16( 11 ) );
+    int16x8_t ip4 = vaddq_s16( i, vdupq_n_s16( 4 ) );
+
+    return vshrq_n_s16( vsubq_s16( vsubq_s16( ip11, vshrq_n_s16( ip11, 7 ) ), vshrq_n_s16( ip4, 7) ), 3 );
+}
+
+static etcpak_force_inline int16x4_t convert7_NEON( int32x4_t x )
+{
+    int16x4_t i = vreinterpret_s16_u16( vshr_n_u16( vqshl_n_u16( vqmovun_s32( x ), 6 ), 6 ) ); // clamp 0-1023
+    i = vhsub_s16( i, vdup_n_s16( 15 ) );
+
+    int16x4_t p9 = vadd_s16( i, vdup_n_s16( 9 ) );
+    int16x4_t p6 = vadd_s16( i, vdup_n_s16( 6 ) );
+    return vshr_n_s16( vsub_s16( vsub_s16( p9, vshr_n_s16( p9, 8 ) ), vshr_n_s16( p6, 8 ) ), 2 );
+}
+
+static etcpak_force_inline std::pair<uint64_t, uint64_t> Planar_NEON( const uint8_t* src )
+{
+    uint8x16x4_t srcBlock = vld4q_u8( src );
+
+    int16x8_t bSumWide = Planar_NEON_SumWide( srcBlock.val[0] );
+    int16x8_t gSumWide = Planar_NEON_SumWide( srcBlock.val[1] );
+    int16x8_t rSumWide = Planar_NEON_SumWide( srcBlock.val[2] );
+
+    int16x8_t dif_R_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[2] ), 4) ), rSumWide );
+    int16x8_t dif_R_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[2] ), 4) ), rSumWide );
+
+    int16x8_t dif_G_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[1] ), 4 ) ), gSumWide );
+    int16x8_t dif_G_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[1] ), 4 ) ), gSumWide );
+
+    int16x8_t dif_B_lo = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_low_u8( srcBlock.val[0] ), 4) ), bSumWide );
+    int16x8_t dif_B_hi = vsubq_s16( vreinterpretq_s16_u16( vshll_n_u8( vget_high_u8( srcBlock.val[0] ), 4) ), bSumWide );
+
+    int32x2x2_t dif_xz_z = vzip_s32( vzip_s32( Planar_NEON_DifXZ( dif_B_lo, dif_B_hi ), Planar_NEON_DifXZ( dif_R_lo, dif_R_hi ) ).val[0], Planar_NEON_DifXZ( dif_G_lo, dif_G_hi ) );
+    int32x4_t dif_xz = vcombine_s32( dif_xz_z.val[0], dif_xz_z.val[1] );
+    int32x2x2_t dif_yz_z = vzip_s32( vzip_s32( Planar_NEON_DifYZ( dif_B_lo, dif_B_hi ), Planar_NEON_DifYZ( dif_R_lo, dif_R_hi ) ).val[0], Planar_NEON_DifYZ( dif_G_lo, dif_G_hi ) );
+    int32x4_t dif_yz = vcombine_s32( dif_yz_z.val[0], dif_yz_z.val[1] );
+
+    const float fscale = -4.0f / ( (255 * 255 * 8.0f + 85 * 85 * 8.0f ) * 16.0f );
+    float32x4_t fa = vmulq_n_f32( vcvtq_f32_s32( dif_xz ), fscale );
+    float32x4_t fb = vmulq_n_f32( vcvtq_f32_s32( dif_yz ), fscale );
+    int16x4_t bgrgSum = vzip_s16( vzip_s16( vget_low_s16( bSumWide ), vget_low_s16( rSumWide ) ).val[0], vget_low_s16( gSumWide ) ).val[0];
+    float32x4_t fd = vmulq_n_f32( vcvtq_f32_s32( vmovl_s16( bgrgSum ) ), 4.0f / 16.0f);
+
+    float32x4_t cof = vmlaq_n_f32( vmlaq_n_f32( fd, fb, 255.0f ), fa, 255.0f );
+    float32x4_t chf = vmlaq_n_f32( vmlaq_n_f32( fd, fb, 255.0f ), fa, -425.0f );
+    float32x4_t cvf = vmlaq_n_f32( vmlaq_n_f32( fd, fb, -425.0f ), fa, 255.0f );
+
+    int32x4_t coi = vcvtq_s32_f32( cof );
+    int32x4_t chi = vcvtq_s32_f32( chf );
+    int32x4_t cvi = vcvtq_s32_f32( cvf );
+
+    int32x4x2_t tr_hv = vtrnq_s32( chi, cvi );
+    int32x4x2_t tr_o = vtrnq_s32( coi, coi );
+
+    int16x8_t c_hvoo_br_6 = convert6_NEON( tr_hv.val[0], tr_o.val[0] );
+    int16x4_t c_hvox_g_7 = convert7_NEON( vcombine_s32( vget_low_s32( tr_hv.val[1] ), vget_low_s32( tr_o.val[1] ) ) );
+    int16x8_t c_hvoo_br_8 = vorrq_s16( vshrq_n_s16( c_hvoo_br_6, 4 ), vshlq_n_s16( c_hvoo_br_6, 2 ) );
+    int16x4_t c_hvox_g_8 = vorr_s16( vshr_n_s16( c_hvox_g_7, 6 ), vshl_n_s16( c_hvox_g_7, 1 ) );
+
+    int16x4_t rec_gxbr_o = vext_s16( c_hvox_g_8, vget_high_s16( c_hvoo_br_8 ), 3 );
+
+    rec_gxbr_o = vadd_s16( vshl_n_s16( rec_gxbr_o, 2 ), vdup_n_s16( 2 ) );
+    int16x8_t rec_ro_wide = vdupq_lane_s16( rec_gxbr_o, 3 );
+    int16x8_t rec_go_wide = vdupq_lane_s16( rec_gxbr_o, 0 );
+    int16x8_t rec_bo_wide = vdupq_lane_s16( rec_gxbr_o, 1 );
+
+    int16x4_t br_hv2 = vsub_s16( vget_low_s16( c_hvoo_br_8 ), vget_high_s16( c_hvoo_br_8 ) );
+    int16x4_t gg_hv2 = vsub_s16( c_hvox_g_8, vdup_lane_s16( c_hvox_g_8, 2 ) );
+
+    int16x8_t scaleh_lo = { 0, 0, 0, 0, 1, 1, 1, 1 };
+    int16x8_t scaleh_hi = { 2, 2, 2, 2, 3, 3, 3, 3 };
+    int16x8_t scalev = { 0, 1, 2, 3, 0, 1, 2, 3 };
+
+    int16x8_t rec_r_1 = vmlaq_lane_s16( rec_ro_wide, scalev, br_hv2, 3 );
+    int16x8_t rec_r_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_r_1, scaleh_lo, br_hv2, 2 ), 2 ) ) );
+    int16x8_t rec_r_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_r_1, scaleh_hi, br_hv2, 2 ), 2 ) ) );
+
+    int16x8_t rec_b_1 = vmlaq_lane_s16( rec_bo_wide, scalev, br_hv2, 1 );
+    int16x8_t rec_b_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_b_1, scaleh_lo, br_hv2, 0 ), 2 ) ) );
+    int16x8_t rec_b_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_b_1, scaleh_hi, br_hv2, 0 ), 2 ) ) );
+
+    int16x8_t rec_g_1 = vmlaq_lane_s16( rec_go_wide, scalev, gg_hv2, 1 );
+    int16x8_t rec_g_lo = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_g_1, scaleh_lo, gg_hv2, 0 ), 2 ) ) );
+    int16x8_t rec_g_hi = vreinterpretq_s16_u16( vmovl_u8( vqshrun_n_s16( vmlaq_lane_s16( rec_g_1, scaleh_hi, gg_hv2, 0 ), 2 ) ) );
+
+    int16x8_t dif_r_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[2] ) ) ), rec_r_lo );
+    int16x8_t dif_r_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[2] ) ) ), rec_r_hi );
+
+    int16x8_t dif_g_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[1] ) ) ), rec_g_lo );
+    int16x8_t dif_g_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[1] ) ) ), rec_g_hi );
+
+    int16x8_t dif_b_lo = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_low_u8( srcBlock.val[0] ) ) ), rec_b_lo );
+    int16x8_t dif_b_hi = vsubq_s16( vreinterpretq_s16_u16( vmovl_u8( vget_high_u8( srcBlock.val[0] ) ) ), rec_b_hi );
+
+    int16x8_t dif_lo = vmlaq_n_s16( vmlaq_n_s16( vmulq_n_s16( dif_r_lo, 38 ), dif_g_lo, 76 ), dif_b_lo, 14 );
+    int16x8_t dif_hi = vmlaq_n_s16( vmlaq_n_s16( vmulq_n_s16( dif_r_hi, 38 ), dif_g_hi, 76 ), dif_b_hi, 14 );
+
+    int16x4_t tmpDif = vget_low_s16( dif_lo );
+    int32x4_t difsq_0 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_high_s16( dif_lo );
+    int32x4_t difsq_1 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_low_s16( dif_hi );
+    int32x4_t difsq_2 = vmull_s16( tmpDif, tmpDif );
+    tmpDif = vget_high_s16( dif_hi );
+    int32x4_t difsq_3 = vmull_s16( tmpDif, tmpDif );
+
+    uint32x4_t difsq_5 = vaddq_u32( vreinterpretq_u32_s32( difsq_0 ), vreinterpretq_u32_s32( difsq_1 ) );
+    uint32x4_t difsq_6 = vaddq_u32( vreinterpretq_u32_s32( difsq_2 ), vreinterpretq_u32_s32( difsq_3) );
+
+    uint64x2_t difsq_7 = vaddl_u32( vget_low_u32( difsq_5 ), vget_high_u32( difsq_5 ) );
+    uint64x2_t difsq_8 = vaddl_u32( vget_low_u32( difsq_6 ), vget_high_u32( difsq_6 ) );
+
+    uint64x2_t difsq_9 = vaddq_u64( difsq_7, difsq_8 );
+
+#ifdef __aarch64__
+    uint64_t error = vaddvq_u64( difsq_9 );
+#else
+    uint64_t error = vgetq_lane_u64( difsq_9, 0 ) + vgetq_lane_u64( difsq_9, 1 );
+#endif
+
+    int32_t coR = c_hvoo_br_6[6];
+    int32_t coG = c_hvox_g_7[2];
+    int32_t coB = c_hvoo_br_6[4];
+
+    int32_t chR = c_hvoo_br_6[2];
+    int32_t chG = c_hvox_g_7[0];
+    int32_t chB = c_hvoo_br_6[0];
+
+    int32_t cvR = c_hvoo_br_6[3];
+    int32_t cvG = c_hvox_g_7[1];
+    int32_t cvB = c_hvoo_br_6[1];
+
+    uint32_t rgbv = cvB | ( cvG << 6 ) | ( cvR << 13 );
+    uint32_t rgbh = chB | ( chG << 6 ) | ( chR << 13 );
+    uint32_t hi = rgbv | ( ( rgbh & 0x1FFF ) << 19 );
+    uint32_t lo = ( chR & 0x1 ) | 0x2 | ( ( chR << 1 ) & 0x7C );
+    lo |= ( ( coB & 0x07 ) << 7 ) | ( ( coB & 0x18 ) << 8 ) | ( ( coB & 0x20 ) << 11 );
+    lo |= ( ( coG & 0x3F) << 17) | ( (coG & 0x40 ) << 18 );
+    lo |= coR << 25;
+
+    const auto idx = ( coR & 0x20 ) | ( ( coG & 0x20 ) >> 1 ) | ( ( coB & 0x1E ) >> 1 );
+
+    lo |= g_flags[idx];
+
+    uint64_t result = static_cast<uint32_t>( _bswap(lo) );
+    result |= static_cast<uint64_t>( static_cast<uint32_t>( _bswap( hi ) ) ) << 32;
+
+    return std::make_pair( result, error );
+}
+
+#endif
+
+template<class T, class S>
+static etcpak_force_inline uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id, const uint64_t value, const uint64_t error)
+{
+    size_t tidx[2];
+    tidx[0] = GetLeastError( terr[0], 8 );
+    tidx[1] = GetLeastError( terr[1], 8 );
+
+    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
+    {
+        return value;
+    }
+
+    d |= tidx[0] << 26;
+    d |= tidx[1] << 29;
+    for( int i=0; i<16; i++ )
+    {
+        uint64_t t = tsel[i][tidx[id[i]%2]];
+        d |= ( t & 0x1 ) << ( i + 32 );
+        d |= ( t & 0x2 ) << ( i + 47 );
+    }
+
+    return FixByteOrder(d);
+}
+
+}
+
+static etcpak_force_inline uint64_t ProcessRGB( const uint8_t* src )
+{
+#ifdef __AVX2__
+    uint64_t d = CheckSolid_AVX2( src );
+    if( d != 0 ) return d;
+
+    alignas(32) v4i a[8];
+
+    __m128i err0 = PrepareAverages_AVX2( a, src );
+
+    // Get index of minimum error (err0)
+    __m128i err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m128i errMin0 = _mm_min_epu32(err0, err1);
+
+    __m128i errMin1 = _mm_shuffle_epi32(errMin0, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i errMin2 = _mm_min_epu32(errMin1, errMin0);
+
+    __m128i errMask = _mm_cmpeq_epi32(errMin2, err0);
+
+    uint32_t mask = _mm_movemask_epi8(errMask);
+
+    uint32_t idx = _bit_scan_forward(mask) >> 2;
+
+    d |= EncodeAverages_AVX2( a, idx );
+
+    alignas(32) uint32_t terr[2][8] = {};
+    alignas(32) uint32_t tsel[8];
+
+    if ((idx == 0) || (idx == 2))
+    {
+        FindBestFit_4x2_AVX2( terr, tsel, a, idx * 2, src );
+    }
+    else
+    {
+        FindBestFit_2x4_AVX2( terr, tsel, a, idx * 2, src );
+    }
+
+    return EncodeSelectors_AVX2( d, terr, tsel, (idx % 2) == 1 );
+#else
+    uint64_t d = CheckSolid( src );
+    if( d != 0 ) return d;
+
+    v4i a[8];
+    unsigned int err[4] = {};
+    PrepareAverages( a, src, err );
+    size_t idx = GetLeastError( err, 4 );
+    EncodeAverages( d, a, idx );
+
+#if ( defined __SSE4_1__ || defined __ARM_NEON ) && !defined REFERENCE_IMPLEMENTATION
+    uint32_t terr[2][8] = {};
+#else
+    uint64_t terr[2][8] = {};
+#endif
+    uint16_t tsel[16][8];
+    auto id = g_id[idx];
+    FindBestFit( terr, tsel, a, id, src );
+
+    return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
+#endif
+}
+
+static etcpak_force_inline uint64_t ProcessRGB_ETC2( const uint8_t* src )
+{
+#ifdef __AVX2__
+    uint64_t d = CheckSolid_AVX2( src );
+    if( d != 0 ) return d;
+
+    auto plane = Planar_AVX2( src );
+
+    alignas(32) v4i a[8];
+
+    __m128i err0 = PrepareAverages_AVX2( a, plane.sum4 );
+
+    // Get index of minimum error (err0)
+    __m128i err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(2, 3, 0, 1));
+    __m128i errMin0 = _mm_min_epu32(err0, err1);
+
+    __m128i errMin1 = _mm_shuffle_epi32(errMin0, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i errMin2 = _mm_min_epu32(errMin1, errMin0);
+
+    __m128i errMask = _mm_cmpeq_epi32(errMin2, err0);
+
+    uint32_t mask = _mm_movemask_epi8(errMask);
+
+    size_t idx = _bit_scan_forward(mask) >> 2;
+
+    d = EncodeAverages_AVX2( a, idx );
+
+    alignas(32) uint32_t terr[2][8] = {};
+    alignas(32) uint32_t tsel[8];
+
+    if ((idx == 0) || (idx == 2))
+    {
+        FindBestFit_4x2_AVX2( terr, tsel, a, idx * 2, src );
+    }
+    else
+    {
+        FindBestFit_2x4_AVX2( terr, tsel, a, idx * 2, src );
+    }
+
+    return EncodeSelectors_AVX2( d, terr, tsel, (idx % 2) == 1, plane.plane, plane.error );
+#else
+    uint64_t d = CheckSolid( src );
+    if (d != 0) return d;
+
+#ifdef __ARM_NEON
+    auto result = Planar_NEON( src );
+#else
+    auto result = Planar( src );
+#endif
+
+    v4i a[8];
+    unsigned int err[4] = {};
+    PrepareAverages( a, src, err );
+    size_t idx = GetLeastError( err, 4 );
+    EncodeAverages( d, a, idx );
+
+#if ( defined __SSE4_1__ || defined __ARM_NEON ) && !defined REFERENCE_IMPLEMENTATION
+    uint32_t terr[2][8] = {};
+#else
+    uint64_t terr[2][8] = {};
+#endif
+    uint16_t tsel[16][8];
+    auto id = g_id[idx];
+    FindBestFit( terr, tsel, a, id, src );
+
+    return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
+#endif
+}
+
+#ifdef __SSE4_1__
+template<int K>
+static etcpak_force_inline __m128i Widen( const __m128i src )
+{
+    static_assert( K >= 0 && K <= 7, "Index out of range" );
+
+    __m128i tmp;
+    switch( K )
+    {
+    case 0:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 1:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 2:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 3:
+        tmp = _mm_shufflelo_epi16( src, _MM_SHUFFLE( 3, 3, 3, 3 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+    case 4:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 0, 0, 0, 0 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 5:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 6:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    case 7:
+        tmp = _mm_shufflehi_epi16( src, _MM_SHUFFLE( 3, 3, 3, 3 ) );
+        return _mm_shuffle_epi32( tmp, _MM_SHUFFLE( 2, 2, 2, 2 ) );
+    }
+}
+
+static etcpak_force_inline int GetMulSel( int sel )
+{
+    switch( sel )
+    {
+    case 0:
+        return 0;
+    case 1:
+    case 2:
+    case 3:
+        return 1;
+    case 4:
+        return 2;
+    case 5:
+    case 6:
+    case 7:
+        return 3;
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+        return 4;
+    case 14:
+    case 15:
+        return 5;
+    }
+}
+
+#endif
+
+#ifdef __ARM_NEON
+
+static constexpr etcpak_force_inline int GetMulSel(int sel)
+{
+    return ( sel < 1 ) ? 0 : ( sel < 4 ) ? 1 : ( sel < 5 ) ? 2 : ( sel < 8 ) ? 3 : ( sel < 14 ) ? 4 : 5;
+}
+
+static constexpr int ClampConstant( int x, int min, int max )
+{
+    return x < min ? min : x > max ? max : x;
+}
+
+template <int Index>
+etcpak_force_inline static uint16x8_t ErrorProbe_EAC_NEON( uint8x8_t recVal, uint8x16_t alphaBlock )
+{
+    uint8x8_t srcValWide;
+#ifndef __aarch64__
+    if( Index < 8 )
+        srcValWide = vdup_lane_u8( vget_low_u8( alphaBlock ), ClampConstant( Index, 0, 8 ) );
+    else
+        srcValWide = vdup_lane_u8( vget_high_u8( alphaBlock ), ClampConstant( Index - 8, 0, 8 ) );
+#else
+    srcValWide = vdup_laneq_u8( alphaBlock, Index );
+#endif
+
+    uint8x8_t deltaVal = vabd_u8( srcValWide, recVal );
+    return vmull_u8( deltaVal, deltaVal );
+}
+
+etcpak_force_inline static uint16_t MinError_EAC_NEON( uint16x8_t errProbe )
+{
+#ifndef __aarch64__
+    uint16x4_t tmpErr = vpmin_u16( vget_low_u16( errProbe ), vget_high_u16( errProbe ) );
+    tmpErr = vpmin_u16( tmpErr, tmpErr );
+    return vpmin_u16( tmpErr, tmpErr )[0];
+#else
+    return vminvq_u16( errProbe );
+#endif
+}
+
+template <int Index>
+etcpak_force_inline static uint64_t MinErrorIndex_EAC_NEON( uint8x8_t recVal, uint8x16_t alphaBlock )
+{
+    uint16x8_t errProbe = ErrorProbe_EAC_NEON<Index>( recVal, alphaBlock );
+    uint16x8_t minErrMask = vceqq_u16( errProbe, vdupq_n_u16( MinError_EAC_NEON( errProbe ) ) );
+    uint64_t idx = __builtin_ctzll( vget_lane_u64( vreinterpret_u64_u8( vqmovn_u16( minErrMask ) ), 0 ) );
+    idx >>= 3;
+    idx <<= 45 - Index * 3;
+
+    return idx;
+}
+
+template <int Index>
+etcpak_force_inline static int16x8_t WidenMultiplier_EAC_NEON( int16x8_t multipliers )
+{
+    constexpr int Lane = GetMulSel( Index );
+#ifndef __aarch64__
+    if( Lane < 4 )
+        return vdupq_lane_s16( vget_low_s16( multipliers ), ClampConstant( Lane, 0, 4 ) );
+    else
+        return vdupq_lane_s16( vget_high_s16( multipliers ), ClampConstant( Lane - 4, 0, 4 ) );
+#else
+    return vdupq_laneq_s16( multipliers, Lane );
+#endif
+}
+
+#endif
+
+static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
+{
+#if defined __SSE4_1__
+    // Check solid
+    __m128i s = _mm_loadu_si128( (__m128i*)src );
+    __m128i solidCmp = _mm_set1_epi8( src[0] );
+    __m128i cmpRes = _mm_cmpeq_epi8( s, solidCmp );
+    if( _mm_testc_si128( cmpRes, _mm_set1_epi32( -1 ) ) )
+    {
+        return src[0];
+    }
+
+    // Calculate min, max
+    __m128i s1 = _mm_shuffle_epi32( s, _MM_SHUFFLE( 2, 3, 0, 1 ) );
+    __m128i max1 = _mm_max_epu8( s, s1 );
+    __m128i min1 = _mm_min_epu8( s, s1 );
+    __m128i smax2 = _mm_shuffle_epi32( max1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i smin2 = _mm_shuffle_epi32( min1, _MM_SHUFFLE( 0, 0, 2, 2 ) );
+    __m128i max2 = _mm_max_epu8( max1, smax2 );
+    __m128i min2 = _mm_min_epu8( min1, smin2 );
+    __m128i smax3 = _mm_alignr_epi8( max2, max2, 2 );
+    __m128i smin3 = _mm_alignr_epi8( min2, min2, 2 );
+    __m128i max3 = _mm_max_epu8( max2, smax3 );
+    __m128i min3 = _mm_min_epu8( min2, smin3 );
+    __m128i smax4 = _mm_alignr_epi8( max3, max3, 1 );
+    __m128i smin4 = _mm_alignr_epi8( min3, min3, 1 );
+    __m128i max = _mm_max_epu8( max3, smax4 );
+    __m128i min = _mm_min_epu8( min3, smin4 );
+    __m128i max16 = _mm_unpacklo_epi8( max, _mm_setzero_si128() );
+    __m128i min16 = _mm_unpacklo_epi8( min, _mm_setzero_si128() );
+
+    // src range, mid
+    __m128i srcRange = _mm_sub_epi16( max16, min16 );
+    __m128i srcRangeHalf = _mm_srli_epi16( srcRange, 1 );
+    __m128i srcMid = _mm_add_epi16( min16, srcRangeHalf );
+
+    // multiplier
+    __m128i mul1 = _mm_mulhi_epi16( srcRange, g_alphaRange_SIMD );
+    __m128i mul = _mm_add_epi16( mul1, _mm_set1_epi16( 1 ) );
+
+    // wide source
+    __m128i s16_1 = _mm_shuffle_epi32( s, _MM_SHUFFLE( 3, 2, 3, 2 ) );
+    __m128i s16[2] = { _mm_unpacklo_epi8( s, _mm_setzero_si128() ), _mm_unpacklo_epi8( s16_1, _mm_setzero_si128() ) };
+
+    __m128i sr[16] = {
+        Widen<0>( s16[0] ),
+        Widen<1>( s16[0] ),
+        Widen<2>( s16[0] ),
+        Widen<3>( s16[0] ),
+        Widen<4>( s16[0] ),
+        Widen<5>( s16[0] ),
+        Widen<6>( s16[0] ),
+        Widen<7>( s16[0] ),
+        Widen<0>( s16[1] ),
+        Widen<1>( s16[1] ),
+        Widen<2>( s16[1] ),
+        Widen<3>( s16[1] ),
+        Widen<4>( s16[1] ),
+        Widen<5>( s16[1] ),
+        Widen<6>( s16[1] ),
+        Widen<7>( s16[1] )
+    };
+
+#ifdef __AVX2__
+    __m256i srcRangeWide = _mm256_broadcastsi128_si256( srcRange );
+    __m256i srcMidWide = _mm256_broadcastsi128_si256( srcMid );
+
+    __m256i mulWide1 = _mm256_mulhi_epi16( srcRangeWide, g_alphaRange_AVX );
+    __m256i mulWide = _mm256_add_epi16( mulWide1, _mm256_set1_epi16( 1 ) );
+
+    __m256i modMul[8] = {
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[0] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[0] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[1] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[1] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[2] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[2] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[3] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[3] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[4] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[4] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[5] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[5] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[6] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[6] ) ) ), _mm256_setzero_si256() ),
+        _mm256_unpacklo_epi8( _mm256_packus_epi16( _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[7] ) ), _mm256_add_epi16( srcMidWide, _mm256_mullo_epi16( mulWide, g_alpha_AVX[7] ) ) ), _mm256_setzero_si256() ),
+    };
+
+    // find selector
+    __m256i mulErr = _mm256_setzero_si256();
+    for( int j=0; j<16; j++ )
+    {
+        __m256i s16Wide = _mm256_broadcastsi128_si256( sr[j] );
+        __m256i err1, err2;
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[0] );
+        __m256i localErr = _mm256_mullo_epi16( err1, err1 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[1] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[2] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[3] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[4] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[5] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[6] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        err1 = _mm256_sub_epi16( s16Wide, modMul[7] );
+        err2 = _mm256_mullo_epi16( err1, err1 );
+        localErr = _mm256_min_epu16( localErr, err2 );
+
+        // note that this can overflow, but since we're looking for the smallest error, it shouldn't matter
+        mulErr = _mm256_adds_epu16( mulErr, localErr );
+    }
+    uint64_t minPos1 = _mm_cvtsi128_si64( _mm_minpos_epu16( _mm256_castsi256_si128( mulErr ) ) );
+    uint64_t minPos2 = _mm_cvtsi128_si64( _mm_minpos_epu16( _mm256_extracti128_si256( mulErr, 1 ) ) );
+    int sel = ( ( minPos1 & 0xFFFF ) < ( minPos2 & 0xFFFF ) ) ? ( minPos1 >> 16 ) : ( 8 + ( minPos2 >> 16 ) );
+
+    __m128i recVal16;
+    switch( sel )
+    {
+    case 0:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ) ), _mm_setzero_si128() );
+        break;
+    case 1:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ) ), _mm_setzero_si128() );
+        break;
+    case 2:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ) ), _mm_setzero_si128() );
+        break;
+    case 3:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ) ), _mm_setzero_si128() );
+        break;
+    case 4:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ) ), _mm_setzero_si128() );
+        break;
+    case 5:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ) ), _mm_setzero_si128() );
+        break;
+    case 6:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ) ), _mm_setzero_si128() );
+        break;
+    case 7:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ) ), _mm_setzero_si128() );
+        break;
+    case 8:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ) ), _mm_setzero_si128() );
+        break;
+    case 9:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ) ), _mm_setzero_si128() );
+        break;
+    case 10:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ) ), _mm_setzero_si128() );
+        break;
+    case 11:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ) ), _mm_setzero_si128() );
+        break;
+    case 12:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ) ), _mm_setzero_si128() );
+        break;
+    case 13:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ) ), _mm_setzero_si128() );
+        break;
+    case 14:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ) ), _mm_setzero_si128() );
+        break;
+    case 15:
+        recVal16 = _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ) ), _mm_setzero_si128() );
+        break;
+    default:
+        assert( false );
+        break;
+    }
+#else
+    // wide multiplier
+    __m128i rangeMul[16] = {
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<0>( mul ), g_alpha_SIMD[0] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[1] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[2] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<1>( mul ), g_alpha_SIMD[3] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<2>( mul ), g_alpha_SIMD[4] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[5] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[6] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<3>( mul ), g_alpha_SIMD[7] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[8] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[9] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[10] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[11] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[12] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<4>( mul ), g_alpha_SIMD[13] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[14] ) ) ), _mm_setzero_si128() ),
+        _mm_unpacklo_epi8( _mm_packus_epi16( _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ), _mm_add_epi16( srcMid, _mm_mullo_epi16( Widen<5>( mul ), g_alpha_SIMD[15] ) ) ), _mm_setzero_si128() )
+    };
+
+    // find selector
+    int err = std::numeric_limits<int>::max();
+    int sel;
+    for( int r=0; r<16; r++ )
+    {
+        __m128i err1, err2, minerr;
+        __m128i recVal16 = rangeMul[r];
+        int rangeErr;
+
+        err1 = _mm_sub_epi16( sr[0], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr = _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[1], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[2], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[3], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[4], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[5], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[6], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[7], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[8], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[9], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[10], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[11], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[12], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[13], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[14], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        err1 = _mm_sub_epi16( sr[15], recVal16 );
+        err2 = _mm_mullo_epi16( err1, err1 );
+        minerr = _mm_minpos_epu16( err2 );
+        rangeErr += _mm_cvtsi128_si64( minerr ) & 0xFFFF;
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            if( err == 0 ) break;
+        }
+    }
+
+    __m128i recVal16 = rangeMul[sel];
+#endif
+
+    // find indices
+    __m128i err1, err2, minerr;
+    uint64_t idx = 0, tmp;
+
+    err1 = _mm_sub_epi16( sr[0], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 15*3;
+
+    err1 = _mm_sub_epi16( sr[1], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 14*3;
+
+    err1 = _mm_sub_epi16( sr[2], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 13*3;
+
+    err1 = _mm_sub_epi16( sr[3], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 12*3;
+
+    err1 = _mm_sub_epi16( sr[4], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 11*3;
+
+    err1 = _mm_sub_epi16( sr[5], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 10*3;
+
+    err1 = _mm_sub_epi16( sr[6], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 9*3;
+
+    err1 = _mm_sub_epi16( sr[7], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 8*3;
+
+    err1 = _mm_sub_epi16( sr[8], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 7*3;
+
+    err1 = _mm_sub_epi16( sr[9], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 6*3;
+
+    err1 = _mm_sub_epi16( sr[10], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 5*3;
+
+    err1 = _mm_sub_epi16( sr[11], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 4*3;
+
+    err1 = _mm_sub_epi16( sr[12], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 3*3;
+
+    err1 = _mm_sub_epi16( sr[13], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 2*3;
+
+    err1 = _mm_sub_epi16( sr[14], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 1*3;
+
+    err1 = _mm_sub_epi16( sr[15], recVal16 );
+    err2 = _mm_mullo_epi16( err1, err1 );
+    minerr = _mm_minpos_epu16( err2 );
+    tmp = _mm_cvtsi128_si64( minerr );
+    idx |= ( tmp >> 16 ) << 0*3;
+
+    uint16_t rm[8];
+    _mm_storeu_si128( (__m128i*)rm, mul );
+    uint16_t sm = _mm_cvtsi128_si64( srcMid );
+
+    uint64_t d = ( uint64_t( sm ) << 56 ) |
+        ( uint64_t( rm[GetMulSel( sel )] ) << 52 ) |
+        ( uint64_t( sel ) << 48 ) |
+        idx;
+
+    return _bswap64( d );
+#elif defined __ARM_NEON
+
+    int16x8_t srcMidWide, multipliers;
+    int srcMid;
+    uint8x16_t srcAlphaBlock = vld1q_u8( src );
+    {
+        uint8_t ref = src[0];
+        uint8x16_t a0 = vdupq_n_u8( ref );
+        uint8x16_t r = vceqq_u8( srcAlphaBlock, a0 );
+        int64x2_t m = vreinterpretq_s64_u8( r );
+        if( m[0] == -1 && m[1] == -1 )
+            return ref;
+
+        // srcRange
+#ifdef __aarch64__
+        uint8_t min = vminvq_u8( srcAlphaBlock );
+        uint8_t max = vmaxvq_u8( srcAlphaBlock );
+        uint8_t srcRange = max - min;
+        multipliers = vqaddq_s16( vshrq_n_s16( vqdmulhq_n_s16( g_alphaRange_NEON, srcRange ), 1 ), vdupq_n_s16( 1 ) );
+        srcMid = min + srcRange / 2;
+        srcMidWide = vdupq_n_s16( srcMid );
+#else
+        uint8x8_t vmin = vpmin_u8( vget_low_u8( srcAlphaBlock ), vget_high_u8( srcAlphaBlock ) );
+        vmin = vpmin_u8( vmin, vmin );
+        vmin = vpmin_u8( vmin, vmin );
+        vmin = vpmin_u8( vmin, vmin );
+        uint8x8_t vmax = vpmax_u8( vget_low_u8( srcAlphaBlock ), vget_high_u8( srcAlphaBlock ) );
+        vmax = vpmax_u8( vmax, vmax );
+        vmax = vpmax_u8( vmax, vmax );
+        vmax = vpmax_u8( vmax, vmax );
+
+        int16x8_t srcRangeWide = vreinterpretq_s16_u16( vsubl_u8( vmax, vmin ) );
+        multipliers = vqaddq_s16( vshrq_n_s16( vqdmulhq_s16( g_alphaRange_NEON, srcRangeWide ), 1 ), vdupq_n_s16( 1 ) );
+        srcMidWide = vsraq_n_s16( vreinterpretq_s16_u16(vmovl_u8(vmin)), srcRangeWide, 1);
+        srcMid = vgetq_lane_s16( srcMidWide, 0 );
+#endif
+    }
+
+    // calculate reconstructed values
+#define EAC_APPLY_16X( m ) m( 0 ) m( 1 ) m( 2 ) m( 3 ) m( 4 ) m( 5 ) m( 6 ) m( 7 ) m( 8 ) m( 9 ) m( 10 ) m( 11 ) m( 12 ) m( 13 ) m( 14 ) m( 15 )
+
+#define EAC_RECONSTRUCT_VALUE( n ) vqmovun_s16( vmlaq_s16( srcMidWide, g_alpha_NEON[n], WidenMultiplier_EAC_NEON<n>( multipliers ) ) ),
+    uint8x8_t recVals[16] = { EAC_APPLY_16X( EAC_RECONSTRUCT_VALUE ) };
+
+    // find selector
+    int err = std::numeric_limits<int>::max();
+    int sel = 0;
+    for( int r = 0; r < 16; r++ )
+    {
+        uint8x8_t recVal = recVals[r];
+
+        int rangeErr = 0;
+#define EAC_ACCUMULATE_ERROR( n ) rangeErr += MinError_EAC_NEON( ErrorProbe_EAC_NEON<n>( recVal, srcAlphaBlock ) );
+        EAC_APPLY_16X( EAC_ACCUMULATE_ERROR )
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            if ( err == 0 ) break;
+        }
+    }
+
+    // combine results
+    uint64_t d = ( uint64_t( srcMid ) << 56 ) |
+        ( uint64_t( multipliers[GetMulSel( sel )] ) << 52 ) |
+        ( uint64_t( sel ) << 48);
+
+    // generate indices
+    uint8x8_t recVal = recVals[sel];
+#define EAC_INSERT_INDEX(n) d |= MinErrorIndex_EAC_NEON<n>( recVal, srcAlphaBlock );
+    EAC_APPLY_16X( EAC_INSERT_INDEX )
+
+    return _bswap64( d );
+
+#undef EAC_APPLY_16X
+#undef EAC_INSERT_INDEX
+#undef EAC_ACCUMULATE_ERROR
+#undef EAC_RECONSTRUCT_VALUE
+
+#else
+    {
+        bool solid = true;
+        const uint8_t* ptr = src + 1;
+        const uint8_t ref = *src;
+        for( int i=1; i<16; i++ )
+        {
+            if( ref != *ptr++ )
+            {
+                solid = false;
+                break;
+            }
+        }
+        if( solid )
+        {
+            return ref;
+        }
+    }
+
+    uint8_t min = src[0];
+    uint8_t max = src[0];
+    for( int i=1; i<16; i++ )
+    {
+        if( min > src[i] ) min = src[i];
+        else if( max < src[i] ) max = src[i];
+    }
+    int srcRange = max - min;
+    int srcMid = min + srcRange / 2;
+
+    uint8_t buf[16][16];
+    int err = std::numeric_limits<int>::max();
+    int sel;
+    int selmul;
+    for( int r=0; r<16; r++ )
+    {
+        int mul = ( ( srcRange * g_alphaRange[r] ) >> 16 ) + 1;
+
+        int rangeErr = 0;
+        for( int i=0; i<16; i++ )
+        {
+            const auto srcVal = src[i];
+
+            int idx = 0;
+            const auto modVal = g_alpha[r][0] * mul;
+            const auto recVal = clampu8( srcMid + modVal );
+            int localErr = sq( srcVal - recVal );
+
+            if( localErr != 0 )
+            {
+                for( int j=1; j<8; j++ )
+                {
+                    const auto modVal = g_alpha[r][j] * mul;
+                    const auto recVal = clampu8( srcMid + modVal );
+                    const auto errProbe = sq( srcVal - recVal );
+                    if( errProbe < localErr )
+                    {
+                        localErr = errProbe;
+                        idx = j;
+                    }
+                }
+            }
+
+            buf[r][i] = idx;
+            rangeErr += localErr;
+        }
+
+        if( rangeErr < err )
+        {
+            err = rangeErr;
+            sel = r;
+            selmul = mul;
+            if( err == 0 ) break;
+        }
+    }
+
+    uint64_t d = ( uint64_t( srcMid ) << 56 ) |
+        ( uint64_t( selmul ) << 52 ) |
+        ( uint64_t( sel ) << 48 );
+
+    int offset = 45;
+    auto ptr = buf[sel];
+    for( int i=0; i<16; i++ )
+    {
+        d |= uint64_t( *ptr++ ) << offset;
+        offset -= 3;
+    }
+
+    return _bswap64( d );
+#endif
+}
+
+
+void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        __m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
+        __m128i p0 = _mm_shuffle_epi8( c0, mask );
+        __m128i p1 = _mm_shuffle_epi8( c1, mask );
+        __m128i p2 = _mm_shuffle_epi8( c2, mask );
+        __m128i p3 = _mm_shuffle_epi8( c3, mask );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  p0 );
+        _mm_store_si128( (__m128i*)(buf + 4),  p1 );
+        _mm_store_si128( (__m128i*)(buf + 8),  p2 );
+        _mm_store_si128( (__m128i*)(buf + 12), p3 );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            unsigned int a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        __m128i mask = _mm_setr_epi32( 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f );
+        __m128i p0 = _mm_shuffle_epi8( c0, mask );
+        __m128i p1 = _mm_shuffle_epi8( c1, mask );
+        __m128i p2 = _mm_shuffle_epi8( c2, mask );
+        __m128i p3 = _mm_shuffle_epi8( c3, mask );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  p0 );
+        _mm_store_si128( (__m128i*)(buf + 4),  p1 );
+        _mm_store_si128( (__m128i*)(buf + 8),  p2 );
+        _mm_store_si128( (__m128i*)(buf + 12), p3 );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            unsigned int a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src += width;
+            a = *src >> 24;
+            *ptr++ = a | ( a << 8 ) | ( a << 16 );
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+#include <chrono>
+#include <thread>
+
+void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+#  ifdef __AVX2__
+        DitherAvx2( (uint8_t*)buf, _mm_castps_si128( px0 ), _mm_castps_si128( px1 ), _mm_castps_si128( px2 ), _mm_castps_si128( px3 ) );
+#  else
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        Dither( (uint8_t*)buf );
+#  endif
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t buf[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        _mm_store_si128( (__m128i*)(buf + 0),  _mm_castps_si128( px0 ) );
+        _mm_store_si128( (__m128i*)(buf + 4),  _mm_castps_si128( px1 ) );
+        _mm_store_si128( (__m128i*)(buf + 8),  _mm_castps_si128( px2 ) );
+        _mm_store_si128( (__m128i*)(buf + 12), _mm_castps_si128( px3 ) );
+
+        src += 4;
+#else
+        auto ptr = buf;
+        for( int x=0; x<4; x++ )
+        {
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src += width;
+            *ptr++ = *src;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)buf );
+    }
+    while( --blocks );
+}
+
+void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width )
+{
+    int w = 0;
+    uint32_t rgba[4*4];
+    uint8_t alpha[4*4];
+    do
+    {
+#ifdef __SSE4_1__
+        __m128 px0 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 0 ) ) );
+        __m128 px1 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 1 ) ) );
+        __m128 px2 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 2 ) ) );
+        __m128 px3 = _mm_castsi128_ps( _mm_loadu_si128( (__m128i*)( src + width * 3 ) ) );
+
+        _MM_TRANSPOSE4_PS( px0, px1, px2, px3 );
+
+        __m128i c0 = _mm_castps_si128( px0 );
+        __m128i c1 = _mm_castps_si128( px1 );
+        __m128i c2 = _mm_castps_si128( px2 );
+        __m128i c3 = _mm_castps_si128( px3 );
+
+        _mm_store_si128( (__m128i*)(rgba + 0),  c0 );
+        _mm_store_si128( (__m128i*)(rgba + 4),  c1 );
+        _mm_store_si128( (__m128i*)(rgba + 8),  c2 );
+        _mm_store_si128( (__m128i*)(rgba + 12), c3 );
+
+        __m128i mask = _mm_setr_epi32( 0x0f0b0703, -1, -1, -1 );
+
+        __m128i a0 = _mm_shuffle_epi8( c0, mask );
+        __m128i a1 = _mm_shuffle_epi8( c1, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 3, 0, 3 ) ) );
+        __m128i a2 = _mm_shuffle_epi8( c2, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 3, 0, 3, 3 ) ) );
+        __m128i a3 = _mm_shuffle_epi8( c3, _mm_shuffle_epi32( mask, _MM_SHUFFLE( 0, 3, 3, 3 ) ) );
+
+        __m128i s0 = _mm_or_si128( a0, a1 );
+        __m128i s1 = _mm_or_si128( a2, a3 );
+        __m128i s2 = _mm_or_si128( s0, s1 );
+
+        _mm_store_si128( (__m128i*)alpha, s2 );
+
+        src += 4;
+#else
+        auto ptr = rgba;
+        auto ptr8 = alpha;
+        for( int x=0; x<4; x++ )
+        {
+            auto v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src += width;
+            v = *src;
+            *ptr++ = v;
+            *ptr8++ = v >> 24;
+            src -= width * 3 - 1;
+        }
+#endif
+        if( ++w == width/4 )
+        {
+            src += width * 3;
+            w = 0;
+        }
+        *dst++ = ProcessAlpha_ETC2( alpha );
+        *dst++ = ProcessRGB_ETC2( (uint8_t*)rgba );
+    }
+    while( --blocks );
+}
diff --git a/thirdparty/etcpak/ProcessRGB.hpp b/thirdparty/etcpak/ProcessRGB.hpp
new file mode 100644
index 0000000000..c5555a5bb1
--- /dev/null
+++ b/thirdparty/etcpak/ProcessRGB.hpp
@@ -0,0 +1,13 @@
+#ifndef __PROCESSRGB_HPP__
+#define __PROCESSRGB_HPP__
+
+#include <stdint.h>
+
+void CompressEtc1Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Alpha( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
+
+#endif
diff --git a/thirdparty/etcpak/Tables.cpp b/thirdparty/etcpak/Tables.cpp
new file mode 100644
index 0000000000..5c7fd9cf61
--- /dev/null
+++ b/thirdparty/etcpak/Tables.cpp
@@ -0,0 +1,221 @@
+#include "Tables.hpp"
+
+const int32_t g_table[8][4] = {
+    {  2,  8,   -2,   -8 },
+    {  5, 17,   -5,  -17 },
+    {  9, 29,   -9,  -29 },
+    { 13, 42,  -13,  -42 },
+    { 18, 60,  -18,  -60 },
+    { 24, 80,  -24,  -80 },
+    { 33, 106, -33, -106 },
+    { 47, 183, -47, -183 }
+};
+
+const int64_t g_table256[8][4] = {
+    {  2*256,  8*256,   -2*256,   -8*256 },
+    {  5*256, 17*256,   -5*256,  -17*256 },
+    {  9*256, 29*256,   -9*256,  -29*256 },
+    { 13*256, 42*256,  -13*256,  -42*256 },
+    { 18*256, 60*256,  -18*256,  -60*256 },
+    { 24*256, 80*256,  -24*256,  -80*256 },
+    { 33*256, 106*256, -33*256, -106*256 },
+    { 47*256, 183*256, -47*256, -183*256 }
+};
+
+const uint32_t g_id[4][16] = {
+    { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
+    { 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
+    { 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
+};
+
+const uint32_t g_avg2[16] = {
+    0x00,
+    0x11,
+    0x22,
+    0x33,
+    0x44,
+    0x55,
+    0x66,
+    0x77,
+    0x88,
+    0x99,
+    0xAA,
+    0xBB,
+    0xCC,
+    0xDD,
+    0xEE,
+    0xFF
+};
+
+const uint32_t g_flags[64] = {
+    0x80800402, 0x80800402, 0x80800402, 0x80800402,
+    0x80800402, 0x80800402, 0x80800402, 0x8080E002,
+    0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
+    0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
+    0x80000402, 0x80000402, 0x80000402, 0x80000402,
+    0x80000402, 0x80000402, 0x80000402, 0x8000E002,
+    0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
+    0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
+    0x00800402, 0x00800402, 0x00800402, 0x00800402,
+    0x00800402, 0x00800402, 0x00800402, 0x0080E002,
+    0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
+    0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
+    0x00000402, 0x00000402, 0x00000402, 0x00000402,
+    0x00000402, 0x00000402, 0x00000402, 0x0000E002,
+    0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
+    0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
+};
+
+const int32_t g_alpha[16][8] = {
+    { -3, -6,  -9, -15, 2, 5, 8, 14 },
+    { -3, -7, -10, -13, 2, 6, 9, 12 },
+    { -2, -5,  -8, -13, 1, 4, 7, 12 },
+    { -2, -4,  -6, -13, 1, 3, 5, 12 },
+    { -3, -6,  -8, -12, 2, 5, 7, 11 },
+    { -3, -7,  -9, -11, 2, 6, 8, 10 },
+    { -4, -7,  -8, -11, 3, 6, 7, 10 },
+    { -3, -5,  -8, -11, 2, 4, 7, 10 },
+    { -2, -6,  -8, -10, 1, 5, 7,  9 },
+    { -2, -5,  -8, -10, 1, 4, 7,  9 },
+    { -2, -4,  -8, -10, 1, 3, 7,  9 },
+    { -2, -5,  -7, -10, 1, 4, 6,  9 },
+    { -3, -4,  -7, -10, 2, 3, 6,  9 },
+    { -1, -2,  -3, -10, 0, 1, 2,  9 },
+    { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+    { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+};
+
+const int32_t g_alphaRange[16] = {
+    0x100FF / ( 1 + g_alpha[0][7] - g_alpha[0][3] ),
+    0x100FF / ( 1 + g_alpha[1][7] - g_alpha[1][3] ),
+    0x100FF / ( 1 + g_alpha[2][7] - g_alpha[2][3] ),
+    0x100FF / ( 1 + g_alpha[3][7] - g_alpha[3][3] ),
+    0x100FF / ( 1 + g_alpha[4][7] - g_alpha[4][3] ),
+    0x100FF / ( 1 + g_alpha[5][7] - g_alpha[5][3] ),
+    0x100FF / ( 1 + g_alpha[6][7] - g_alpha[6][3] ),
+    0x100FF / ( 1 + g_alpha[7][7] - g_alpha[7][3] ),
+    0x100FF / ( 1 + g_alpha[8][7] - g_alpha[8][3] ),
+    0x100FF / ( 1 + g_alpha[9][7] - g_alpha[9][3] ),
+    0x100FF / ( 1 + g_alpha[10][7] - g_alpha[10][3] ),
+    0x100FF / ( 1 + g_alpha[11][7] - g_alpha[11][3] ),
+    0x100FF / ( 1 + g_alpha[12][7] - g_alpha[12][3] ),
+    0x100FF / ( 1 + g_alpha[13][7] - g_alpha[13][3] ),
+    0x100FF / ( 1 + g_alpha[14][7] - g_alpha[14][3] ),
+    0x100FF / ( 1 + g_alpha[15][7] - g_alpha[15][3] ),
+};
+
+#ifdef __SSE4_1__
+const __m128i g_table_SIMD[2] =
+{
+    _mm_setr_epi16(   2,   5,   9,  13,  18,  24,  33,  47),
+    _mm_setr_epi16(   8,  17,  29,  42,  60,  80, 106, 183)
+};
+const __m128i g_table128_SIMD[2] =
+{
+    _mm_setr_epi16(   2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128),
+    _mm_setr_epi16(   8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128)
+};
+const __m128i g_table256_SIMD[4] =
+{
+    _mm_setr_epi32(  2*256,   5*256,   9*256,  13*256),
+    _mm_setr_epi32(  8*256,  17*256,  29*256,  42*256),
+    _mm_setr_epi32( 18*256,  24*256,  33*256,  47*256),
+    _mm_setr_epi32( 60*256,  80*256, 106*256, 183*256)
+};
+
+const __m128i g_alpha_SIMD[16] = {
+    _mm_setr_epi16( g_alpha[ 0][0], g_alpha[ 0][1], g_alpha[ 0][2], g_alpha[ 0][3], g_alpha[ 0][4], g_alpha[ 0][5], g_alpha[ 0][6], g_alpha[ 0][7] ),
+    _mm_setr_epi16( g_alpha[ 1][0], g_alpha[ 1][1], g_alpha[ 1][2], g_alpha[ 1][3], g_alpha[ 1][4], g_alpha[ 1][5], g_alpha[ 1][6], g_alpha[ 1][7] ),
+    _mm_setr_epi16( g_alpha[ 2][0], g_alpha[ 2][1], g_alpha[ 2][2], g_alpha[ 2][3], g_alpha[ 2][4], g_alpha[ 2][5], g_alpha[ 2][6], g_alpha[ 2][7] ),
+    _mm_setr_epi16( g_alpha[ 3][0], g_alpha[ 3][1], g_alpha[ 3][2], g_alpha[ 3][3], g_alpha[ 3][4], g_alpha[ 3][5], g_alpha[ 3][6], g_alpha[ 3][7] ),
+    _mm_setr_epi16( g_alpha[ 4][0], g_alpha[ 4][1], g_alpha[ 4][2], g_alpha[ 4][3], g_alpha[ 4][4], g_alpha[ 4][5], g_alpha[ 4][6], g_alpha[ 4][7] ),
+    _mm_setr_epi16( g_alpha[ 5][0], g_alpha[ 5][1], g_alpha[ 5][2], g_alpha[ 5][3], g_alpha[ 5][4], g_alpha[ 5][5], g_alpha[ 5][6], g_alpha[ 5][7] ),
+    _mm_setr_epi16( g_alpha[ 6][0], g_alpha[ 6][1], g_alpha[ 6][2], g_alpha[ 6][3], g_alpha[ 6][4], g_alpha[ 6][5], g_alpha[ 6][6], g_alpha[ 6][7] ),
+    _mm_setr_epi16( g_alpha[ 7][0], g_alpha[ 7][1], g_alpha[ 7][2], g_alpha[ 7][3], g_alpha[ 7][4], g_alpha[ 7][5], g_alpha[ 7][6], g_alpha[ 7][7] ),
+    _mm_setr_epi16( g_alpha[ 8][0], g_alpha[ 8][1], g_alpha[ 8][2], g_alpha[ 8][3], g_alpha[ 8][4], g_alpha[ 8][5], g_alpha[ 8][6], g_alpha[ 8][7] ),
+    _mm_setr_epi16( g_alpha[ 9][0], g_alpha[ 9][1], g_alpha[ 9][2], g_alpha[ 9][3], g_alpha[ 9][4], g_alpha[ 9][5], g_alpha[ 9][6], g_alpha[ 9][7] ),
+    _mm_setr_epi16( g_alpha[10][0], g_alpha[10][1], g_alpha[10][2], g_alpha[10][3], g_alpha[10][4], g_alpha[10][5], g_alpha[10][6], g_alpha[10][7] ),
+    _mm_setr_epi16( g_alpha[11][0], g_alpha[11][1], g_alpha[11][2], g_alpha[11][3], g_alpha[11][4], g_alpha[11][5], g_alpha[11][6], g_alpha[11][7] ),
+    _mm_setr_epi16( g_alpha[12][0], g_alpha[12][1], g_alpha[12][2], g_alpha[12][3], g_alpha[12][4], g_alpha[12][5], g_alpha[12][6], g_alpha[12][7] ),
+    _mm_setr_epi16( g_alpha[13][0], g_alpha[13][1], g_alpha[13][2], g_alpha[13][3], g_alpha[13][4], g_alpha[13][5], g_alpha[13][6], g_alpha[13][7] ),
+    _mm_setr_epi16( g_alpha[14][0], g_alpha[14][1], g_alpha[14][2], g_alpha[14][3], g_alpha[14][4], g_alpha[14][5], g_alpha[14][6], g_alpha[14][7] ),
+    _mm_setr_epi16( g_alpha[15][0], g_alpha[15][1], g_alpha[15][2], g_alpha[15][3], g_alpha[15][4], g_alpha[15][5], g_alpha[15][6], g_alpha[15][7] ),
+};
+
+const __m128i g_alphaRange_SIMD = _mm_setr_epi16(
+    g_alphaRange[0],
+    g_alphaRange[1],
+    g_alphaRange[4],
+    g_alphaRange[5],
+    g_alphaRange[8],
+    g_alphaRange[14],
+    0,
+    0 );
+#endif
+
+#ifdef __AVX2__
+const __m256i g_alpha_AVX[8] = {
+    _mm256_setr_epi16( g_alpha[ 0][0], g_alpha[ 1][0], g_alpha[ 2][0], g_alpha[ 3][0], g_alpha[ 4][0], g_alpha[ 5][0], g_alpha[ 6][0], g_alpha[ 7][0], g_alpha[ 8][0], g_alpha[ 9][0], g_alpha[10][0], g_alpha[11][0], g_alpha[12][0], g_alpha[13][0], g_alpha[14][0], g_alpha[15][0] ),
+    _mm256_setr_epi16( g_alpha[ 0][1], g_alpha[ 1][1], g_alpha[ 2][1], g_alpha[ 3][1], g_alpha[ 4][1], g_alpha[ 5][1], g_alpha[ 6][1], g_alpha[ 7][1], g_alpha[ 8][1], g_alpha[ 9][1], g_alpha[10][1], g_alpha[11][1], g_alpha[12][1], g_alpha[13][1], g_alpha[14][1], g_alpha[15][1] ),
+    _mm256_setr_epi16( g_alpha[ 0][2], g_alpha[ 1][2], g_alpha[ 2][2], g_alpha[ 3][2], g_alpha[ 4][2], g_alpha[ 5][2], g_alpha[ 6][2], g_alpha[ 7][2], g_alpha[ 8][2], g_alpha[ 9][2], g_alpha[10][2], g_alpha[11][2], g_alpha[12][2], g_alpha[13][2], g_alpha[14][2], g_alpha[15][2] ),
+    _mm256_setr_epi16( g_alpha[ 0][3], g_alpha[ 1][3], g_alpha[ 2][3], g_alpha[ 3][3], g_alpha[ 4][3], g_alpha[ 5][3], g_alpha[ 6][3], g_alpha[ 7][3], g_alpha[ 8][3], g_alpha[ 9][3], g_alpha[10][3], g_alpha[11][3], g_alpha[12][3], g_alpha[13][3], g_alpha[14][3], g_alpha[15][3] ),
+    _mm256_setr_epi16( g_alpha[ 0][4], g_alpha[ 1][4], g_alpha[ 2][4], g_alpha[ 3][4], g_alpha[ 4][4], g_alpha[ 5][4], g_alpha[ 6][4], g_alpha[ 7][4], g_alpha[ 8][4], g_alpha[ 9][4], g_alpha[10][4], g_alpha[11][4], g_alpha[12][4], g_alpha[13][4], g_alpha[14][4], g_alpha[15][4] ),
+    _mm256_setr_epi16( g_alpha[ 0][5], g_alpha[ 1][5], g_alpha[ 2][5], g_alpha[ 3][5], g_alpha[ 4][5], g_alpha[ 5][5], g_alpha[ 6][5], g_alpha[ 7][5], g_alpha[ 8][5], g_alpha[ 9][5], g_alpha[10][5], g_alpha[11][5], g_alpha[12][5], g_alpha[13][5], g_alpha[14][5], g_alpha[15][5] ),
+    _mm256_setr_epi16( g_alpha[ 0][6], g_alpha[ 1][6], g_alpha[ 2][6], g_alpha[ 3][6], g_alpha[ 4][6], g_alpha[ 5][6], g_alpha[ 6][6], g_alpha[ 7][6], g_alpha[ 8][6], g_alpha[ 9][6], g_alpha[10][6], g_alpha[11][6], g_alpha[12][6], g_alpha[13][6], g_alpha[14][6], g_alpha[15][6] ),
+    _mm256_setr_epi16( g_alpha[ 0][7], g_alpha[ 1][7], g_alpha[ 2][7], g_alpha[ 3][7], g_alpha[ 4][7], g_alpha[ 5][7], g_alpha[ 6][7], g_alpha[ 7][7], g_alpha[ 8][7], g_alpha[ 9][7], g_alpha[10][7], g_alpha[11][7], g_alpha[12][7], g_alpha[13][7], g_alpha[14][7], g_alpha[15][7] ),
+};
+
+const __m256i g_alphaRange_AVX = _mm256_setr_epi16(
+    g_alphaRange[ 0], g_alphaRange[ 1], g_alphaRange[ 2], g_alphaRange[ 3], g_alphaRange[ 4], g_alphaRange[ 5], g_alphaRange[ 6], g_alphaRange[ 7],
+    g_alphaRange[ 8], g_alphaRange[ 9], g_alphaRange[10], g_alphaRange[11], g_alphaRange[12], g_alphaRange[13], g_alphaRange[14], g_alphaRange[15]
+);
+#endif
+
+#ifdef __ARM_NEON
+const int16x8_t g_table128_NEON[2] =
+{
+    { 2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128 },
+    { 8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128 }
+};
+
+const int32x4_t g_table256_NEON[4] =
+{
+    {  2*256,   5*256,   9*256,  13*256 },
+    {  8*256,  17*256,  29*256,  42*256 },
+    { 18*256,  24*256,  33*256,  47*256 },
+    { 60*256,  80*256, 106*256, 183*256 }
+};
+
+const int16x8_t g_alpha_NEON[16] =
+{
+    { -3, -6,  -9, -15, 2, 5, 8, 14 },
+    { -3, -7, -10, -13, 2, 6, 9, 12 },
+    { -2, -5,  -8, -13, 1, 4, 7, 12 },
+    { -2, -4,  -6, -13, 1, 3, 5, 12 },
+    { -3, -6,  -8, -12, 2, 5, 7, 11 },
+    { -3, -7,  -9, -11, 2, 6, 8, 10 },
+    { -4, -7,  -8, -11, 3, 6, 7, 10 },
+    { -3, -5,  -8, -11, 2, 4, 7, 10 },
+    { -2, -6,  -8, -10, 1, 5, 7,  9 },
+    { -2, -5,  -8, -10, 1, 4, 7,  9 },
+    { -2, -4,  -8, -10, 1, 3, 7,  9 },
+    { -2, -5,  -7, -10, 1, 4, 6,  9 },
+    { -3, -4,  -7, -10, 2, 3, 6,  9 },
+    { -1, -2,  -3, -10, 0, 1, 2,  9 },
+    { -4, -6,  -8,  -9, 3, 5, 7,  8 },
+    { -3, -5,  -7,  -9, 2, 4, 6,  8 }
+};
+
+const int16x8_t g_alphaRange_NEON =
+{
+    (int16_t)g_alphaRange[0],
+    (int16_t)g_alphaRange[1],
+    (int16_t)g_alphaRange[4],
+    (int16_t)g_alphaRange[5],
+    (int16_t)g_alphaRange[8],
+    (int16_t)g_alphaRange[14],
+    0,
+    0
+};
+#endif
diff --git a/thirdparty/etcpak/Tables.hpp b/thirdparty/etcpak/Tables.hpp
new file mode 100644
index 0000000000..69d7e8aa07
--- /dev/null
+++ b/thirdparty/etcpak/Tables.hpp
@@ -0,0 +1,49 @@
+#ifndef __TABLES_HPP__
+#define __TABLES_HPP__
+
+#include <stdint.h>
+
+#ifdef __AVX2__
+#  include <immintrin.h>
+#endif
+#ifdef __SSE4_1__
+#  include <smmintrin.h>
+#endif
+#ifdef __ARM_NEON
+#  include <arm_neon.h>
+#endif
+
+extern const int32_t g_table[8][4];
+extern const int64_t g_table256[8][4];
+
+extern const uint32_t g_id[4][16];
+
+extern const uint32_t g_avg2[16];
+
+extern const uint32_t g_flags[64];
+
+extern const int32_t g_alpha[16][8];
+extern const int32_t g_alphaRange[16];
+
+#ifdef __SSE4_1__
+extern const __m128i g_table_SIMD[2];
+extern const __m128i g_table128_SIMD[2];
+extern const __m128i g_table256_SIMD[4];
+
+extern const __m128i g_alpha_SIMD[16];
+extern const __m128i g_alphaRange_SIMD;
+#endif
+
+#ifdef __AVX2__
+extern const __m256i g_alpha_AVX[8];
+extern const __m256i g_alphaRange_AVX;
+#endif
+
+#ifdef __ARM_NEON
+extern const int16x8_t g_table128_NEON[2];
+extern const int32x4_t g_table256_NEON[4];
+extern const int16x8_t g_alpha_NEON[16];
+extern const int16x8_t g_alphaRange_NEON;
+#endif
+
+#endif
diff --git a/thirdparty/etcpak/Vector.hpp b/thirdparty/etcpak/Vector.hpp
new file mode 100644
index 0000000000..3370a88aea
--- /dev/null
+++ b/thirdparty/etcpak/Vector.hpp
@@ -0,0 +1,222 @@
+#ifndef __DARKRL__VECTOR_HPP__
+#define __DARKRL__VECTOR_HPP__
+
+#include <assert.h>
+#include <algorithm>
+#include <math.h>
+#include <stdint.h>
+
+#include "Math.hpp"
+
+template<class T>
+struct Vector2
+{
+    Vector2() : x( 0 ), y( 0 ) {}
+    Vector2( T v ) : x( v ), y( v ) {}
+    Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
+
+    bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
+    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
+
+    Vector2<T>& operator+=( const Vector2<T>& rhs )
+    {
+        x += rhs.x;
+        y += rhs.y;
+        return *this;
+    }
+    Vector2<T>& operator-=( const Vector2<T>& rhs )
+    {
+        x -= rhs.x;
+        y -= rhs.y;
+        return *this;
+    }
+    Vector2<T>& operator*=( const Vector2<T>& rhs )
+    {
+        x *= rhs.x;
+        y *= rhs.y;
+        return *this;
+    }
+
+    T x, y;
+};
+
+template<class T>
+Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
+{
+    return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
+}
+
+template<class T>
+Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
+{
+    return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
+}
+
+template<class T>
+Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
+{
+    return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
+}
+
+template<class T>
+Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
+{
+    return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
+}
+
+
+typedef Vector2<int32_t> v2i;
+typedef Vector2<float> v2f;
+
+
+template<class T>
+struct Vector3
+{
+    Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
+    Vector3( T v ) : x( v ), y( v ), z( v ) {}
+    Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
+    template<class Y>
+    Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
+
+    T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
+    void Clamp()
+    {
+        x = std::min( T(1), std::max( T(0), x ) );
+        y = std::min( T(1), std::max( T(0), y ) );
+        z = std::min( T(1), std::max( T(0), z ) );
+    }
+
+    bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
+    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
+
+    T& operator[]( unsigned int idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
+    const T& operator[]( unsigned int idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
+
+    Vector3<T> operator+=( const Vector3<T>& rhs )
+    {
+        x += rhs.x;
+        y += rhs.y;
+        z += rhs.z;
+        return *this;
+    }
+
+    Vector3<T> operator*=( const Vector3<T>& rhs )
+    {
+        x *= rhs.x;
+        y *= rhs.y;
+        z *= rhs.z;
+        return *this;
+    }
+
+    Vector3<T> operator*=( const float& rhs )
+    {
+        x *= rhs;
+        y *= rhs;
+        z *= rhs;
+        return *this;
+    }
+
+    T x, y, z;
+    T padding;
+};
+
+template<class T>
+Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
+}
+
+template<class T>
+Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
+}
+
+template<class T>
+Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
+}
+
+template<class T>
+Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
+{
+    return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
+}
+
+template<class T>
+Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
+{
+    return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
+}
+
+template<class T>
+bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
+{
+    return lhs.Luminance() < rhs.Luminance();
+}
+
+typedef Vector3<int32_t> v3i;
+typedef Vector3<float> v3f;
+typedef Vector3<uint8_t> v3b;
+
+
+static inline v3b v3f_to_v3b( const v3f& v )
+{
+    return v3b( uint8_t( std::min( 1.f, v.x ) * 255 ), uint8_t( std::min( 1.f, v.y ) * 255 ), uint8_t( std::min( 1.f, v.z ) * 255 ) );
+}
+
+template<class T>
+Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
+{
+    return v1 + ( v2 - v1 ) * amount;
+}
+
+template<>
+inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
+{
+    return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
+}
+
+template<class T>
+Vector3<T> Desaturate( const Vector3<T>& v )
+{
+    T l = v.Luminance();
+    return Vector3<T>( l, l, l );
+}
+
+template<class T>
+Vector3<T> Desaturate( const Vector3<T>& v, float mul )
+{
+    T l = T( v.Luminance() * mul );
+    return Vector3<T>( l, l, l );
+}
+
+template<class T>
+Vector3<T> pow( const Vector3<T>& base, float exponent )
+{
+    return Vector3<T>(
+        pow( base.x, exponent ),
+        pow( base.y, exponent ),
+        pow( base.z, exponent ) );
+}
+
+template<class T>
+Vector3<T> sRGB2linear( const Vector3<T>& v )
+{
+    return Vector3<T>(
+        sRGB2linear( v.x ),
+        sRGB2linear( v.y ),
+        sRGB2linear( v.z ) );
+}
+
+template<class T>
+Vector3<T> linear2sRGB( const Vector3<T>& v )
+{
+    return Vector3<T>(
+        linear2sRGB( v.x ),
+        linear2sRGB( v.y ),
+        linear2sRGB( v.z ) );
+}
+
+#endif
diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md
index 4fcd766d22..3c52415f62 100644
--- a/thirdparty/meshoptimizer/LICENSE.md
+++ b/thirdparty/meshoptimizer/LICENSE.md
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2016-2020 Arseny Kapoulkine
+Copyright (c) 2016-2021 Arseny Kapoulkine
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp
index f7d88c5136..f8aad7b49c 100644
--- a/thirdparty/meshoptimizer/clusterizer.cpp
+++ b/thirdparty/meshoptimizer/clusterizer.cpp
@@ -2,6 +2,7 @@
 #include "meshoptimizer.h"
 
 #include <assert.h>
+#include <float.h>
 #include <math.h>
 #include <string.h>
 
@@ -12,6 +13,68 @@
 namespace meshopt
 {
 
+// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
+const size_t kMeshletMaxVertices = 255;
+
+// A reasonable limit is around 2*max_vertices or less
+const size_t kMeshletMaxTriangles = 512;
+
+struct TriangleAdjacency2
+{
+	unsigned int* counts;
+	unsigned int* offsets;
+	unsigned int* data;
+};
+
+static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+	size_t face_count = index_count / 3;
+
+	// allocate arrays
+	adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+	// fill triangle counts
+	memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		assert(indices[i] < vertex_count);
+
+		adjacency.counts[indices[i]]++;
+	}
+
+	// fill offset table
+	unsigned int offset = 0;
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		adjacency.offsets[i] = offset;
+		offset += adjacency.counts[i];
+	}
+
+	assert(offset == index_count);
+
+	// fill triangle data
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+		adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+	}
+
+	// fix offsets that have been disturbed by the previous pass
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+		adjacency.offsets[i] -= adjacency.counts[i];
+	}
+}
+
 static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
 {
 	assert(count > 0);
@@ -82,13 +145,310 @@ static void computeBoundingSphere(float result[4], const float points[][3], size
 	result[3] = radius;
 }
 
+struct Cone
+{
+	float px, py, pz;
+	float nx, ny, nz;
+};
+
+static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
+{
+	float cone = 1.f - spread * cone_weight;
+	float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
+
+	return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
+}
+
+static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
+{
+	Cone result = acc;
+
+	float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
+
+	result.px *= center_scale;
+	result.py *= center_scale;
+	result.pz *= center_scale;
+
+	float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
+	float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
+
+	result.nx *= axis_scale;
+	result.ny *= axis_scale;
+	result.nz *= axis_scale;
+
+	return result;
+}
+
+static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	(void)vertex_count;
+
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+	size_t face_count = index_count / 3;
+
+	float mesh_area = 0;
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		const float* p0 = vertex_positions + vertex_stride_float * a;
+		const float* p1 = vertex_positions + vertex_stride_float * b;
+		const float* p2 = vertex_positions + vertex_stride_float * c;
+
+		float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+		float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+		float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+		float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+		float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+		float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+		float invarea = (area == 0.f) ? 0.f : 1.f / area;
+
+		triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
+		triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
+		triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
+
+		triangles[i].nx = normalx * invarea;
+		triangles[i].ny = normaly * invarea;
+		triangles[i].nz = normalz * invarea;
+
+		mesh_area += area;
+	}
+
+	return mesh_area;
+}
+
+static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
+{
+	size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
+
+	// fill 4b padding with 0
+	while (offset & 3)
+		meshlet_triangles[offset++] = 0;
+}
+
+static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
+{
+	unsigned char& av = used[a];
+	unsigned char& bv = used[b];
+	unsigned char& cv = used[c];
+
+	bool result = false;
+
+	unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+
+	if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+	{
+		meshlets[meshlet_offset] = meshlet;
+
+		for (size_t j = 0; j < meshlet.vertex_count; ++j)
+			used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
+
+		finishMeshlet(meshlet, meshlet_triangles);
+
+		meshlet.vertex_offset += meshlet.vertex_count;
+		meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
+		meshlet.vertex_count = 0;
+		meshlet.triangle_count = 0;
+
+		result = true;
+	}
+
+	if (av == 0xff)
+	{
+		av = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
+	}
+
+	if (bv == 0xff)
+	{
+		bv = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
+	}
+
+	if (cv == 0xff)
+	{
+		cv = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
+	}
+
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
+	meshlet.triangle_count++;
+
+	return result;
+}
+
+struct KDNode
+{
+	union
+	{
+		float split;
+		unsigned int index;
+	};
+
+	// leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
+	// branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
+	unsigned int axis : 2;
+	unsigned int children : 30;
+};
+
+static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
+{
+	size_t m = 0;
+
+	// invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
+	for (size_t i = 0; i < count; ++i)
+	{
+		float v = points[indices[i] * stride + axis];
+
+		// swap(m, i) unconditionally
+		unsigned int t = indices[m];
+		indices[m] = indices[i];
+		indices[i] = t;
+
+		// when v >= pivot, we swap i with m without advancing it, preserving invariants
+		m += v < pivot;
+	}
+
+	return m;
+}
+
+static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
+{
+	assert(offset + count <= node_count);
+	(void)node_count;
+
+	KDNode& result = nodes[offset];
+
+	result.index = indices[0];
+	result.axis = 3;
+	result.children = unsigned(count - 1);
+
+	// all remaining points are stored in nodes immediately following the leaf
+	for (size_t i = 1; i < count; ++i)
+	{
+		KDNode& tail = nodes[offset + i];
+
+		tail.index = indices[i];
+		tail.axis = 3;
+		tail.children = ~0u >> 2; // bogus value to prevent misuse
+	}
+
+	return offset + count;
+}
+
+static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
+{
+	assert(count > 0);
+	assert(offset < node_count);
+
+	if (count <= leaf_size)
+		return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+	float mean[3] = {};
+	float vars[3] = {};
+	float runc = 1, runs = 1;
+
+	// gather statistics on the points in the subtree using Welford's algorithm
+	for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
+	{
+		const float* point = points + indices[i] * stride;
+
+		for (int k = 0; k < 3; ++k)
+		{
+			float delta = point[k] - mean[k];
+			mean[k] += delta * runs;
+			vars[k] += delta * (point[k] - mean[k]);
+		}
+	}
+
+	// split axis is one where the variance is largest
+	unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1
+	                                                                                      : 2;
+
+	float split = mean[axis];
+	size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
+
+	// when the partition is degenerate simply consolidate the points into a single node
+	if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
+		return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+	KDNode& result = nodes[offset];
+
+	result.split = split;
+	result.axis = axis;
+
+	// left subtree is right after our node
+	size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
+
+	// distance to the right subtree is represented explicitly
+	result.children = unsigned(next_offset - offset - 1);
+
+	return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
+}
+
+static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
+{
+	const KDNode& node = nodes[root];
+
+	if (node.axis == 3)
+	{
+		// leaf
+		for (unsigned int i = 0; i <= node.children; ++i)
+		{
+			unsigned int index = nodes[root + i].index;
+
+			if (emitted_flags[index])
+				continue;
+
+			const float* point = points + index * stride;
+
+			float distance2 =
+			    (point[0] - position[0]) * (point[0] - position[0]) +
+			    (point[1] - position[1]) * (point[1] - position[1]) +
+			    (point[2] - position[2]) * (point[2] - position[2]);
+			float distance = sqrtf(distance2);
+
+			if (distance < limit)
+			{
+				result = index;
+				limit = distance;
+			}
+		}
+	}
+	else
+	{
+		// branch; we order recursion to process the node that search position is in first
+		float delta = position[node.axis] - node.split;
+		unsigned int first = (delta <= 0) ? 0 : node.children;
+		unsigned int second = first ^ node.children;
+
+		kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
+
+		// only process the other node if it can have a match based on closest distance so far
+		if (fabsf(delta) <= limit)
+			kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
+	}
+}
+
 } // namespace meshopt
 
 size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
 {
+	using namespace meshopt;
+
 	assert(index_count % 3 == 0);
-	assert(max_vertices >= 3);
-	assert(max_triangles >= 1);
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+	(void)kMeshletMaxVertices;
+	(void)kMeshletMaxTriangles;
 
 	// meshlet construction is limited by max vertices and max triangles per meshlet
 	// the worst case is that the input is an unindexed stream since this equally stresses both limits
@@ -100,77 +460,226 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_
 	return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
 }
 
-size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
 {
+	using namespace meshopt;
+
 	assert(index_count % 3 == 0);
-	assert(max_vertices >= 3);
-	assert(max_triangles >= 1);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
 
 	meshopt_Allocator allocator;
 
-	meshopt_Meshlet meshlet;
-	memset(&meshlet, 0, sizeof(meshlet));
+	TriangleAdjacency2 adjacency = {};
+	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+	size_t face_count = index_count / 3;
+
+	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+	memset(emitted_flags, 0, face_count);
+
+	// for each triangle, precompute centroid & normal to use for scoring
+	Cone* triangles = allocator.allocate<Cone>(face_count);
+	float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+
+	// assuming each meshlet is a square patch, expected radius is sqrt(expected area)
+	float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
+	float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
+
+	// build a kd-tree for nearest neighbor lookup
+	unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
+	for (size_t i = 0; i < face_count; ++i)
+		kdindices[i] = unsigned(i);
 
-	assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
-	assert(max_triangles <= sizeof(meshlet.indices) / 3);
+	KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
+	kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
 
 	// index of the vertex in the meshlet, 0xff if the vertex isn't used
 	unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
 	memset(used, -1, vertex_count);
 
-	size_t offset = 0;
+	meshopt_Meshlet meshlet = {};
+	size_t meshlet_offset = 0;
 
-	for (size_t i = 0; i < index_count; i += 3)
-	{
-		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
-		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+	Cone meshlet_cone_acc = {};
 
-		unsigned char& av = used[a];
-		unsigned char& bv = used[b];
-		unsigned char& cv = used[c];
+	for (;;)
+	{
+		unsigned int best_triangle = ~0u;
+		unsigned int best_extra = 5;
+		float best_score = FLT_MAX;
 
-		unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+		Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
 
-		if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+		for (size_t i = 0; i < meshlet.vertex_count; ++i)
 		{
-			destination[offset++] = meshlet;
+			unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
+
+			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+			size_t neighbours_size = adjacency.counts[index];
+
+			for (size_t j = 0; j < neighbours_size; ++j)
+			{
+				unsigned int triangle = neighbours[j];
+				assert(!emitted_flags[triangle]);
+
+				unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+				assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+				unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
+
+				// triangles that don't add new vertices to meshlets are max. priority
+				if (extra != 0)
+				{
+					// artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
+					if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
+						extra = 0;
+
+					extra++;
+				}
+
+				// since topology-based priority is always more important than the score, we can skip scoring in some cases
+				if (extra > best_extra)
+					continue;
+
+				const Cone& tri_cone = triangles[triangle];
+
+				float distance2 =
+				    (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) +
+				    (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) +
+				    (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz);
 
-			for (size_t j = 0; j < meshlet.vertex_count; ++j)
-				used[meshlet.vertices[j]] = 0xff;
+				float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz;
 
-			memset(&meshlet, 0, sizeof(meshlet));
+				float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
+
+				// note that topology-based priority is always more important than the score
+				// this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
+				if (extra < best_extra || score < best_score)
+				{
+					best_triangle = triangle;
+					best_extra = extra;
+					best_score = score;
+				}
+			}
 		}
 
-		if (av == 0xff)
+		if (best_triangle == ~0u)
 		{
-			av = meshlet.vertex_count;
-			meshlet.vertices[meshlet.vertex_count++] = a;
+			float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
+			unsigned int index = ~0u;
+			float limit = FLT_MAX;
+
+			kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
+
+			best_triangle = index;
 		}
 
-		if (bv == 0xff)
+		if (best_triangle == ~0u)
+			break;
+
+		unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		// add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
+		if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
 		{
-			bv = meshlet.vertex_count;
-			meshlet.vertices[meshlet.vertex_count++] = b;
+			meshlet_offset++;
+			memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
 		}
 
-		if (cv == 0xff)
+		live_triangles[a]--;
+		live_triangles[b]--;
+		live_triangles[c]--;
+
+		// remove emitted triangle from adjacency data
+		// this makes sure that we spend less time traversing these lists on subsequent iterations
+		for (size_t k = 0; k < 3; ++k)
 		{
-			cv = meshlet.vertex_count;
-			meshlet.vertices[meshlet.vertex_count++] = c;
+			unsigned int index = indices[best_triangle * 3 + k];
+
+			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+			size_t neighbours_size = adjacency.counts[index];
+
+			for (size_t i = 0; i < neighbours_size; ++i)
+			{
+				unsigned int tri = neighbours[i];
+
+				if (tri == best_triangle)
+				{
+					neighbours[i] = neighbours[neighbours_size - 1];
+					adjacency.counts[index]--;
+					break;
+				}
+			}
 		}
 
-		meshlet.indices[meshlet.triangle_count][0] = av;
-		meshlet.indices[meshlet.triangle_count][1] = bv;
-		meshlet.indices[meshlet.triangle_count][2] = cv;
-		meshlet.triangle_count++;
+		// update aggregated meshlet cone data for scoring subsequent triangles
+		meshlet_cone_acc.px += triangles[best_triangle].px;
+		meshlet_cone_acc.py += triangles[best_triangle].py;
+		meshlet_cone_acc.pz += triangles[best_triangle].pz;
+		meshlet_cone_acc.nx += triangles[best_triangle].nx;
+		meshlet_cone_acc.ny += triangles[best_triangle].ny;
+		meshlet_cone_acc.nz += triangles[best_triangle].nz;
+
+		emitted_flags[best_triangle] = 1;
+	}
+
+	if (meshlet.triangle_count)
+	{
+		finishMeshlet(meshlet, meshlet_triangles);
+
+		meshlets[meshlet_offset++] = meshlet;
+	}
+
+	assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+	return meshlet_offset;
+}
+
+size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+	meshopt_Allocator allocator;
+
+	// index of the vertex in the meshlet, 0xff if the vertex isn't used
+	unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
+	memset(used, -1, vertex_count);
+
+	meshopt_Meshlet meshlet = {};
+	size_t meshlet_offset = 0;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		// appends triangle to the meshlet and writes previous meshlet to the output if full
+		meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
 	}
 
 	if (meshlet.triangle_count)
-		destination[offset++] = meshlet;
+	{
+		finishMeshlet(meshlet, meshlet_triangles);
 
-	assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+		meshlets[meshlet_offset++] = meshlet;
+	}
 
-	return offset;
+	assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+	return meshlet_offset;
 }
 
 meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
@@ -178,18 +687,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
 	using namespace meshopt;
 
 	assert(index_count % 3 == 0);
+	assert(index_count / 3 <= kMeshletMaxTriangles);
 	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
 	assert(vertex_positions_stride % sizeof(float) == 0);
 
-	assert(index_count / 3 <= 256);
-
 	(void)vertex_count;
 
 	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
 
 	// compute triangle normals and gather triangle corners
-	float normals[256][3];
-	float corners[256][3][3];
+	float normals[kMeshletMaxTriangles][3];
+	float corners[kMeshletMaxTriangles][3][3];
 	size_t triangles = 0;
 
 	for (size_t i = 0; i < index_count; i += 3)
@@ -327,25 +835,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
 	return bounds;
 }
 
-meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
 {
+	using namespace meshopt;
+
+	assert(triangle_count <= kMeshletMaxTriangles);
 	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
 	assert(vertex_positions_stride % sizeof(float) == 0);
 
-	unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
+	unsigned int indices[kMeshletMaxTriangles * 3];
 
-	for (size_t i = 0; i < meshlet->triangle_count; ++i)
+	for (size_t i = 0; i < triangle_count * 3; ++i)
 	{
-		unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
-		unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
-		unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
-
-		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+		unsigned int index = meshlet_vertices[meshlet_triangles[i]];
+		assert(index < vertex_count);
 
-		indices[i * 3 + 0] = a;
-		indices[i * 3 + 1] = b;
-		indices[i * 3 + 2] = c;
+		indices[i] = index;
 	}
 
-	return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
+	return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
 }
diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp
index aa4a30efa4..f60db0dc4f 100644
--- a/thirdparty/meshoptimizer/indexgenerator.cpp
+++ b/thirdparty/meshoptimizer/indexgenerator.cpp
@@ -4,6 +4,8 @@
 #include <assert.h>
 #include <string.h>
 
+// This work is based on:
+// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
 namespace meshopt
 {
 
@@ -83,10 +85,49 @@ struct VertexStreamHasher
 	}
 };
 
+struct EdgeHasher
+{
+	const unsigned int* remap;
+
+	size_t hash(unsigned long long edge) const
+	{
+		unsigned int e0 = unsigned(edge >> 32);
+		unsigned int e1 = unsigned(edge);
+
+		unsigned int h1 = remap[e0];
+		unsigned int h2 = remap[e1];
+
+		const unsigned int m = 0x5bd1e995;
+
+		// MurmurHash64B finalizer
+		h1 ^= h2 >> 18;
+		h1 *= m;
+		h2 ^= h1 >> 22;
+		h2 *= m;
+		h1 ^= h2 >> 17;
+		h1 *= m;
+		h2 ^= h1 >> 19;
+		h2 *= m;
+
+		return h2;
+	}
+
+	bool equal(unsigned long long lhs, unsigned long long rhs) const
+	{
+		unsigned int l0 = unsigned(lhs >> 32);
+		unsigned int l1 = unsigned(lhs);
+
+		unsigned int r0 = unsigned(rhs >> 32);
+		unsigned int r1 = unsigned(rhs);
+
+		return remap[l0] == remap[r0] && remap[l1] == remap[r1];
+	}
+};
+
 static size_t hashBuckets(size_t count)
 {
 	size_t buckets = 1;
-	while (buckets < count)
+	while (buckets < count + count / 4)
 		buckets *= 2;
 
 	return buckets;
@@ -119,6 +160,26 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c
 	return 0;
 }
 
+static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+{
+	VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
+
+	size_t vertex_table_size = hashBuckets(vertex_count);
+	unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
+	memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		unsigned int index = unsigned(i);
+		unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
+
+		if (*entry == ~0u)
+			*entry = index;
+
+		remap[index] = *entry;
+	}
+}
+
 } // namespace meshopt
 
 size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
@@ -345,3 +406,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
 		destination[i] = remap[index];
 	}
 }
+
+void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	static const int next[4] = {1, 2, 0, 1};
+
+	// build position remap: for each vertex, which other (canonical) vertex does it map to?
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+	// build edge set; this stores all triangle edges but we can look these up by any other wedge
+	EdgeHasher edge_hasher = {remap};
+
+	size_t edge_table_size = hashBuckets(index_count);
+	unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+	unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
+
+	memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+	memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			unsigned int i2 = indices[i + next[e + 1]];
+			assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
+
+			unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+			unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			if (*entry == ~0ull)
+			{
+				*entry = edge;
+
+				// store vertex opposite to the edge
+				edge_vertex_table[entry - edge_table] = i2;
+			}
+		}
+	}
+
+	// build resulting index buffer: 6 indices for each input triangle
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int patch[6];
+
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			// note: this refers to the opposite edge!
+			unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+			unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			patch[e * 2 + 0] = i0;
+			patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
+		}
+
+		memcpy(destination + i * 2, patch, sizeof(patch));
+	}
+}
+
+void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	static const int next[3] = {1, 2, 0};
+
+	// build position remap: for each vertex, which other (canonical) vertex does it map to?
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+	// build edge set; this stores all triangle edges but we can look these up by any other wedge
+	EdgeHasher edge_hasher = {remap};
+
+	size_t edge_table_size = hashBuckets(index_count);
+	unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+	memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+			unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			if (*entry == ~0ull)
+				*entry = edge;
+		}
+	}
+
+	// build resulting index buffer: 12 indices for each input triangle
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int patch[12];
+
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			// note: this refers to the opposite edge!
+			unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+			unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			// use the same edge if opposite edge doesn't exist (border)
+			oppe = (oppe == ~0ull) ? edge : oppe;
+
+			// triangle index (0, 1, 2)
+			patch[e] = i0;
+
+			// opposite edge (3, 4; 5, 6; 7, 8)
+			patch[3 + e * 2 + 0] = unsigned(oppe);
+			patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
+
+			// dominant vertex (9, 10, 11)
+			patch[9 + e] = remap[i0];
+		}
+
+		memcpy(destination + i * 4, patch, sizeof(patch));
+	}
+}
diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
index 1714000384..fe8d349731 100644
--- a/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -1,7 +1,7 @@
 /**
- * meshoptimizer - version 0.15
+ * meshoptimizer - version 0.16
  *
- * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  * Report bugs and download new versions at https://github.com/zeux/meshoptimizer
  *
  * This library is distributed under the MIT License. See notice at the end of this file.
@@ -12,7 +12,7 @@
 #include <stddef.h>
 
 /* Version macro; major * 1000 + minor * 10 + patch */
-#define MESHOPTIMIZER_VERSION 150 /* 0.15 */
+#define MESHOPTIMIZER_VERSION 160 /* 0.16 */
 
 /* If no API is defined, assume default */
 #ifndef MESHOPTIMIZER_API
@@ -98,6 +98,35 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati
 MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
 
 /**
+ * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
+ * Each triangle is converted into a 6-vertex patch with the following layout:
+ * - 0, 2, 4: original triangle vertices
+ * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40
+ * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY.
+ * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count*2 elements)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement
+ * Each triangle is converted into a 12-vertex patch with the following layout:
+ * - 0, 1, 2: original triangle vertices
+ * - 3, 4: opposing edge for edge 0, 1
+ * - 5, 6: opposing edge for edge 1, 2
+ * - 7, 8: opposing edge for edge 2, 0
+ * - 9, 10, 11: dominant vertices for corners 0, 1, 2
+ * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping.
+ * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count*4 elements)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
  * Vertex transform cache optimizer
  * Reorders indices to reduce the number of GPU vertex shader invocations
  * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
@@ -373,22 +402,31 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc
 
 struct meshopt_Meshlet
 {
-	unsigned int vertices[64];
-	unsigned char indices[126][3];
-	unsigned char triangle_count;
-	unsigned char vertex_count;
+	/* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
+	unsigned int vertex_offset;
+	unsigned int triangle_offset;
+
+	/* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */
+	unsigned int vertex_count;
+	unsigned int triangle_count;
 };
 
 /**
  * Experimental: Meshlet builder
  * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
  * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
- * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+ * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
+ * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
  *
- * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
- * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
+ * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
+ * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
+ * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512)
+ * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency
  */
-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
 MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
 
 struct meshopt_Bounds
@@ -426,10 +464,10 @@ struct meshopt_Bounds
  * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
  *
  * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
- * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
+ * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
  */
 MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
-MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 
 /**
  * Experimental: Spatial sorter
@@ -513,6 +551,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices,
 template <typename T>
 inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
 template <typename T>
+inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
+inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
 inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
 template <typename T>
 inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
@@ -547,7 +589,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
 template <typename T>
 inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
 template <typename T>
-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+template <typename T>
+inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
 template <typename T>
 inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 template <typename T>
@@ -762,6 +806,24 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi
 }
 
 template <typename T>
+inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	meshopt_IndexAdapter<T> in(0, indices, index_count);
+	meshopt_IndexAdapter<T> out(destination, 0, index_count * 2);
+
+	meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
+inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	meshopt_IndexAdapter<T> in(0, indices, index_count);
+	meshopt_IndexAdapter<T> out(destination, 0, index_count * 4);
+
+	meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
 inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
 {
 	meshopt_IndexAdapter<T> in(0, indices, index_count);
@@ -908,11 +970,19 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices
 }
 
 template <typename T>
-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
+{
+	meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+	return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight);
+}
+
+template <typename T>
+inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
 {
 	meshopt_IndexAdapter<T> in(0, indices, index_count);
 
-	return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
+	return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles);
 }
 
 template <typename T>
@@ -934,7 +1004,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_
 #endif
 
 /**
- * Copyright (c) 2016-2020 Arseny Kapoulkine
+ * Copyright (c) 2016-2021 Arseny Kapoulkine
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
index 942db14461..b2cb589462 100644
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -131,7 +131,7 @@ struct PositionHasher
 static size_t hashBuckets2(size_t count)
 {
 	size_t buckets = 1;
-	while (buckets < count)
+	while (buckets < count + count / 4)
 		buckets *= 2;
 
 	return buckets;
diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp
index 2cbfaac367..5f3ec204ab 100644
--- a/thirdparty/meshoptimizer/vertexcodec.cpp
+++ b/thirdparty/meshoptimizer/vertexcodec.cpp
@@ -710,18 +710,12 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
 SIMD_TARGET
 static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
 {
-	v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3);
-
-	uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
-	uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
+	// magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00
+	const uint64_t magic = 0x000103070f1f3f80ull;
 
 	// TODO: This can use v8x16_bitmask in the future
-	uint64_t mask_2 = mask_1a | mask_1b;
-	uint64_t mask_4 = mask_2 | (mask_2 >> 16);
-	uint64_t mask_8 = mask_4 | (mask_4 >> 8);
-
-	mask0 = uint8_t(mask_8);
-	mask1 = uint8_t(mask_8 >> 32);
+	mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56);
+	mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56);
 }
 
 SIMD_TARGET