11 files changed, 1502 insertions, 130 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 31b19451b3..3b6932b3e1 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -118,7 +118,7 @@ will limit its functionality to IPv4 only.
 ## etcpak
 
 - Upstream: https://github.com/wolfpld/etcpak
-- Version: git (f128369e64a5f4715de8125b325e4fe7debb5194, 2022)
+- Version: 1.0 (a77d5a37ddf48034cee8aeb9e8792a623c265b4c, 2022)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
@@ -167,6 +167,11 @@ Files extracted from upstream source:
 - `include/` folder, minus the `dlg` subfolder
 - `LICENSE.TXT` and `docs/FTL.TXT`
 
+Some changes have been made in order to prevent LTO from removing code.
+They are marked with `// -- GODOT start --` and `// -- GODOT end --`
+comments. Apply the patches in the `patches/` folder when syncing on newer upstream
+commits.
+
 
 ## glslang
 
@@ -340,7 +345,7 @@ File extracted from upstream release tarball:
 ## meshoptimizer
 
 - Upstream: https://github.com/zeux/meshoptimizer
-- Version: git (8a7d69caa68f778cb559f1879b6beb7987c8c6b7, 2022)
+- Version: git (ea4558d1c0f217f1d67ed7fe0b07896ece88ae18, 2022)
 - License: MIT
 
 Files extracted from upstream repository:
@@ -432,6 +437,15 @@ Collection of single-file libraries used in Godot components.
   * Upstream: https://github.com/Auburn/FastNoiseLite
   * Version: git (6be3d6bf7fb408de341285f9ee8a29b67fd953f1, 2022) + custom changes
   * License: MIT
+- `ok_color.h`
+  * Upstream: https://github.com/bottosson/bottosson.github.io/blob/master/misc/ok_color.h
+  * Version: git (d69831edb90ffdcd08b7e64da3c5405acd48ad2c, 2022)
+  * License: MIT
+  * Modifications: License included in header.
+- `ok_color_shader.h`
+  * https://www.shadertoy.com/view/7sK3D1
+  * Version: 2021-09-13
+  * License: MIT
 - `pcg.{cpp,h}`
   * Upstream: http://www.pcg-random.org
   * Version: minimal C implementation, http://www.pcg-random.org/download.html
@@ -695,7 +709,7 @@ Files extracted from upstream source:
 SDK release: https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/master/layers/generated/vk_enum_string_helper.h
 
 `vk_mem_alloc.h` is taken from https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
-Version: 3.0.1-development (2022-03-28), commit `5b598e0a359381d7e2a94149210a1b7642024ae5`
+Version: 3.0.1 (2022-06-10), commit `cfdc0f8775ab3258a3b9c4e47d8ce4b6f52a5441`
 `vk_mem_alloc.cpp` is a Godot file and should be preserved on updates.
 
 Patches in the `patches` directory should be re-applied after updates.
diff --git a/thirdparty/etcpak/AUTHORS.txt b/thirdparty/etcpak/AUTHORS.txt
index e7bae62c85..675b4eb2a9 100644
--- a/thirdparty/etcpak/AUTHORS.txt
+++ b/thirdparty/etcpak/AUTHORS.txt
@@ -1,3 +1,5 @@
 Bartosz Taudul <wolf@nereid.pl>
 Daniel Jungmann <el.3d.source@gmail.com>
 Florian Penzkofer <fp@nullptr.de>
+Jae-Ho Nah <nahjaeho@gmail.com>
+Marcin Ławicki <marcin.lawicki@gmail.com>
diff --git a/thirdparty/etcpak/LICENSE.txt b/thirdparty/etcpak/LICENSE.txt
index 59e85d6ea5..9c71039b9b 100644
--- a/thirdparty/etcpak/LICENSE.txt
+++ b/thirdparty/etcpak/LICENSE.txt
@@ -1,6 +1,6 @@
 etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
 
-Copyright (c) 2013-2021, Bartosz Taudul <wolf@nereid.pl>
+Copyright (c) 2013-2022, Bartosz Taudul <wolf@nereid.pl>
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/thirdparty/freetype/patches/fix_gcc_lto_build.diff b/thirdparty/freetype/patches/fix_gcc_lto_build.diff
new file mode 100644
index 0000000000..3c22b464c2
--- /dev/null
+++ b/thirdparty/freetype/patches/fix_gcc_lto_build.diff
@@ -0,0 +1,34 @@
+diff --git a/thirdparty/freetype/src/smooth/ftgrays.c b/thirdparty/freetype/src/smooth/ftgrays.c
+index 622035aa79..5d9e1600b7 100644
+--- a/thirdparty/freetype/src/smooth/ftgrays.c
++++ b/thirdparty/freetype/src/smooth/ftgrays.c
+@@ -1907,6 +1907,9 @@ typedef ptrdiff_t  FT_PtrDist;
+     0                                        /* delta    */
+   )
+ 
++// -- GODOT start --
++  static volatile int _lto_dummy = 0;
++// -- GODOT end --
+ 
+   static int
+   gray_convert_glyph_inner( RAS_ARG,
+@@ -1928,6 +1931,9 @@ typedef ptrdiff_t  FT_PtrDist;
+                   ras.max_ey,
+                   ras.cell_null - ras.cell_free,
+                   ras.cell_null - ras.cell_free == 1 ? "" : "s" ));
++// -- GODOT start --
++      _lto_dummy = error; // Prevents LTO from removing this branch.
++// -- GODOT end --
+     }
+     else
+     {
+@@ -1935,6 +1941,9 @@ typedef ptrdiff_t  FT_PtrDist;
+ 
+       FT_TRACE7(( "band [%d..%d]: to be bisected\n",
+                   ras.min_ey, ras.max_ey ));
++// -- GODOT start --
++      _lto_dummy = error; // Prevents LTO from removing this branch.
++// -- GODOT end --
+     }
+ 
+     return error;
diff --git a/thirdparty/freetype/src/smooth/ftgrays.c b/thirdparty/freetype/src/smooth/ftgrays.c
index 622035aa79..5d9e1600b7 100644
--- a/thirdparty/freetype/src/smooth/ftgrays.c
+++ b/thirdparty/freetype/src/smooth/ftgrays.c
@@ -1907,6 +1907,9 @@ typedef ptrdiff_t  FT_PtrDist;
     0                                        /* delta    */
   )
 
+// -- GODOT start --
+  static volatile int _lto_dummy = 0;
+// -- GODOT end --
 
   static int
   gray_convert_glyph_inner( RAS_ARG,
@@ -1928,6 +1931,9 @@ typedef ptrdiff_t  FT_PtrDist;
                   ras.max_ey,
                   ras.cell_null - ras.cell_free,
                   ras.cell_null - ras.cell_free == 1 ? "" : "s" ));
+// -- GODOT start --
+      _lto_dummy = error; // Prevents LTO from removing this branch.
+// -- GODOT end --
     }
     else
     {
@@ -1935,6 +1941,9 @@ typedef ptrdiff_t  FT_PtrDist;
 
       FT_TRACE7(( "band [%d..%d]: to be bisected\n",
                   ras.min_ey, ras.max_ey ));
+// -- GODOT start --
+      _lto_dummy = error; // Prevents LTO from removing this branch.
+// -- GODOT end --
     }
 
     return error;
diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch
index 213f35dd69..21daac6eec 100644
--- a/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch
+++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch
@@ -1,5 +1,5 @@
 diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
-index e384046ffe..ccc99edb1a 100644
+index 5e92e2dc73..e40c141e76 100644
 --- a/thirdparty/meshoptimizer/simplifier.cpp
 +++ b/thirdparty/meshoptimizer/simplifier.cpp
 @@ -20,7 +20,7 @@
@@ -11,7 +11,7 @@ index e384046ffe..ccc99edb1a 100644
  
  // This work is based on:
  // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
-@@ -445,6 +445,7 @@ struct Collapse
+@@ -453,6 +453,7 @@ struct Collapse
  		float error;
  		unsigned int errorui;
  	};
@@ -19,7 +19,7 @@ index e384046ffe..ccc99edb1a 100644
  };
  
  static float normalize(Vector3& v)
-@@ -525,6 +526,34 @@ static float quadricError(const Quadric& Q, const Vector3& v)
+@@ -533,6 +534,34 @@ static float quadricError(const Quadric& Q, const Vector3& v)
  	return fabsf(r) * s;
  }
  
@@ -54,7 +54,7 @@ index e384046ffe..ccc99edb1a 100644
  static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
  {
  	float aw = a * w;
-@@ -680,7 +709,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3
+@@ -688,7 +717,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3
  }
  #endif
  
@@ -63,7 +63,7 @@ index e384046ffe..ccc99edb1a 100644
  {
  	for (size_t i = 0; i < index_count; i += 3)
  	{
-@@ -690,6 +719,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+@@ -698,6 +727,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
  
  		Quadric Q;
  		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
@@ -73,7 +73,7 @@ index e384046ffe..ccc99edb1a 100644
  
  #if ATTRIBUTES
  		quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
-@@ -700,7 +732,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+@@ -708,7 +740,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
  	}
  }
  
@@ -82,7 +82,7 @@ index e384046ffe..ccc99edb1a 100644
  {
  	for (size_t i = 0; i < index_count; i += 3)
  	{
-@@ -744,6 +776,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+@@ -752,6 +784,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
  
  			quadricAdd(vertex_quadrics[remap[i0]], Q);
  			quadricAdd(vertex_quadrics[remap[i1]], Q);
@@ -92,7 +92,7 @@ index e384046ffe..ccc99edb1a 100644
  		}
  	}
  }
-@@ -848,7 +883,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
+@@ -856,7 +891,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
  	return collapse_count;
  }
  
@@ -101,7 +101,7 @@ index e384046ffe..ccc99edb1a 100644
  {
  	for (size_t i = 0; i < collapse_count; ++i)
  	{
-@@ -868,10 +903,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
+@@ -876,10 +911,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
  		float ei = quadricError(qi, vertex_positions[i1]);
  		float ej = quadricError(qj, vertex_positions[j1]);
  
@@ -116,7 +116,7 @@ index e384046ffe..ccc99edb1a 100644
  	}
  }
  
-@@ -968,7 +1007,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse
+@@ -976,7 +1015,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse
  	}
  }
  
@@ -125,7 +125,7 @@ index e384046ffe..ccc99edb1a 100644
  {
  	size_t edge_collapses = 0;
  	size_t triangle_collapses = 0;
-@@ -1030,6 +1069,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
+@@ -1038,6 +1077,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
  		assert(collapse_remap[r1] == r1);
  
  		quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
@@ -133,7 +133,7 @@ index e384046ffe..ccc99edb1a 100644
  
  		if (vertex_kind[i0] == Kind_Complex)
  		{
-@@ -1067,7 +1107,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
+@@ -1075,7 +1115,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
  		triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2;
  		edge_collapses++;
  
@@ -142,7 +142,7 @@ index e384046ffe..ccc99edb1a 100644
  	}
  
  #if TRACE
-@@ -1455,9 +1495,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+@@ -1463,9 +1503,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
  
  	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
  	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
@@ -156,7 +156,7 @@ index e384046ffe..ccc99edb1a 100644
  
  	if (result != indices)
  		memcpy(result, indices, index_count * sizeof(unsigned int));
-@@ -1488,7 +1530,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+@@ -1496,7 +1538,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
  		if (edge_collapse_count == 0)
  			break;
  
@@ -165,7 +165,7 @@ index e384046ffe..ccc99edb1a 100644
  
  #if TRACE > 1
  		dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
-@@ -1507,7 +1549,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+@@ -1515,7 +1557,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
  		printf("pass %d: ", int(pass_count++));
  #endif
  
diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
index 51a424765e..33a17fe9fa 100644
--- a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
+++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
@@ -15,7 +15,7 @@ index be4b765d97..463fad29da 100644
   * Experimental: Mesh simplifier (sloppy)
   * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
 diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
-index bf1431269d..e384046ffe 100644
+index a74b08a97d..5e92e2dc73 100644
 --- a/thirdparty/meshoptimizer/simplifier.cpp
 +++ b/thirdparty/meshoptimizer/simplifier.cpp
 @@ -20,6 +20,8 @@
@@ -27,7 +27,7 @@ index bf1431269d..e384046ffe 100644
  // This work is based on:
  // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
  // Michael Garland. Quadric-based polygonal surface simplification. 1999
-@@ -363,6 +365,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
+@@ -371,6 +373,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
  struct Vector3
  {
  	float x, y, z;
@@ -38,7 +38,7 @@ index bf1431269d..e384046ffe 100644
  };
  
  static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
-@@ -419,6 +425,13 @@ struct Quadric
+@@ -427,6 +433,13 @@ struct Quadric
  	float a10, a20, a21;
  	float b0, b1, b2, c;
  	float w;
@@ -52,7 +52,7 @@ index bf1431269d..e384046ffe 100644
  };
  
  struct Collapse
-@@ -461,6 +474,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
+@@ -469,6 +482,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
  	Q.b2 += R.b2;
  	Q.c += R.c;
  	Q.w += R.w;
@@ -69,7 +69,7 @@ index bf1431269d..e384046ffe 100644
  }
  
  static float quadricError(const Quadric& Q, const Vector3& v)
-@@ -486,6 +509,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
+@@ -494,6 +517,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
  	r += ry * v.y;
  	r += rz * v.z;
  
@@ -87,7 +87,7 @@ index bf1431269d..e384046ffe 100644
  	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
  
  	return fabsf(r) * s;
-@@ -509,6 +543,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
+@@ -517,6 +551,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
  	Q.b2 = c * dw;
  	Q.c = d * dw;
  	Q.w = w;
@@ -101,7 +101,7 @@ index bf1431269d..e384046ffe 100644
  }
  
  static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
-@@ -561,6 +602,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
+@@ -569,6 +610,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
  	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
  }
  
@@ -186,7 +186,7 @@ index bf1431269d..e384046ffe 100644
  static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
  {
  	for (size_t i = 0; i < index_count; i += 3)
-@@ -572,6 +691,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+@@ -580,6 +699,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
  		Quadric Q;
  		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
  
@@ -196,7 +196,7 @@ index bf1431269d..e384046ffe 100644
  		quadricAdd(vertex_quadrics[remap[i0]], Q);
  		quadricAdd(vertex_quadrics[remap[i1]], Q);
  		quadricAdd(vertex_quadrics[remap[i2]], Q);
-@@ -1265,13 +1387,19 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0;
+@@ -1273,13 +1395,19 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0;
  #endif
  
  size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
@@ -218,7 +218,7 @@ index bf1431269d..e384046ffe 100644
  
  	meshopt_Allocator allocator;
  
-@@ -1285,7 +1413,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+@@ -1293,7 +1421,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
  	// build position remap that maps each vertex to the one with identical position
  	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
  	unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
@@ -227,7 +227,7 @@ index bf1431269d..e384046ffe 100644
  
  	// classify vertices; vertex kind determines collapse rules, see kCanCollapse
  	unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
-@@ -1309,7 +1437,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+@@ -1317,7 +1445,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
  #endif
  
  	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
@@ -250,7 +250,7 @@ index bf1431269d..e384046ffe 100644
  
  	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
  	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
-@@ -1401,7 +1543,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+@@ -1409,7 +1551,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
  
  	// result_error is quadratic; we need to remap it back to linear
  	if (out_result_error)
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
index ccc99edb1a..e40c141e76 100644
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -276,7 +276,15 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
 		{
 			unsigned int target = edges[j].next;
 
-			if (!hasEdge(adjacency, target, vertex))
+			if (target == vertex)
+			{
+				// degenerate triangles have two distinct edges instead of three, and the self edge
+				// is bi-directional by definition; this can break border/seam classification by "closing"
+				// the open edge from another triangle and falsely marking the vertex as manifold
+				// instead we mark the vertex as having >1 open edges which turns it into locked/complex
+				openinc[vertex] = openout[vertex] = vertex;
+			}
+			else if (!hasEdge(adjacency, target, vertex))
 			{
 				openinc[target] = (openinc[target] == ~0u) ? vertex : target;
 				openout[vertex] = (openout[vertex] == ~0u) ? target : vertex;
diff --git a/thirdparty/misc/ok_color.h b/thirdparty/misc/ok_color.h
new file mode 100644
index 0000000000..dbc7dafc36
--- /dev/null
+++ b/thirdparty/misc/ok_color.h
@@ -0,0 +1,688 @@
+// Copyright(c) 2021 Björn Ottosson
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files(the "Software"), to deal in
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and /or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef OK_COLOR_H
+#define OK_COLOR_H
+
+#include <cmath>
+#include <cfloat>
+
+class ok_color
+{
+public:
+
+struct Lab { float L; float a; float b; };
+struct RGB { float r; float g; float b; };
+struct HSV { float h; float s; float v; };
+struct HSL { float h; float s; float l; };
+struct LC { float L; float C; };
+
+// Alternative representation of (L_cusp, C_cusp)
+// Encoded so S = C_cusp/L_cusp and T = C_cusp/(1-L_cusp) 
+// The maximum value for C in the triangle is then found as fmin(S*L, T*(1-L)), for a given L
+struct ST { float S; float T; };
+
+static constexpr float pi = 3.1415926535897932384626433832795028841971693993751058209749445923078164062f;
+
+float clamp(float x, float min, float max)
+{
+	if (x < min)
+		return min;
+	if (x > max)
+		return max;
+
+	return x;
+}
+
+float sgn(float x)
+{
+	return (float)(0.f < x) - (float)(x < 0.f);
+}
+
+float srgb_transfer_function(float a)
+{
+	return .0031308f >= a ? 12.92f * a : 1.055f * powf(a, .4166666666666667f) - .055f;
+}
+
+float srgb_transfer_function_inv(float a)
+{
+	return .04045f < a ? powf((a + .055f) / 1.055f, 2.4f) : a / 12.92f;
+}
+
+Lab linear_srgb_to_oklab(RGB c)
+{
+	float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b;
+	float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b;
+	float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b;
+
+	float l_ = cbrtf(l);
+	float m_ = cbrtf(m);
+	float s_ = cbrtf(s);
+
+	return {
+		0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
+		1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_,
+		0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_,
+	};
+}
+
+RGB oklab_to_linear_srgb(Lab c)
+{
+	float l_ = c.L + 0.3963377774f * c.a + 0.2158037573f * c.b;
+	float m_ = c.L - 0.1055613458f * c.a - 0.0638541728f * c.b;
+	float s_ = c.L - 0.0894841775f * c.a - 1.2914855480f * c.b;
+
+	float l = l_ * l_ * l_;
+	float m = m_ * m_ * m_;
+	float s = s_ * s_ * s_;
+
+	return {
+		+4.0767416621f * l - 3.3077115913f * m + 0.2309699292f * s,
+		-1.2684380046f * l + 2.6097574011f * m - 0.3413193965f * s,
+		-0.0041960863f * l - 0.7034186147f * m + 1.7076147010f * s,
+	};
+}
+
+// Finds the maximum saturation possible for a given hue that fits in sRGB
+// Saturation here is defined as S = C/L
+// a and b must be normalized so a^2 + b^2 == 1
+float compute_max_saturation(float a, float b)
+{
+	// Max saturation will be when one of r, g or b goes below zero.
+
+	// Select different coefficients depending on which component goes below zero first
+	float k0, k1, k2, k3, k4, wl, wm, ws;
+
+	if (-1.88170328f * a - 0.80936493f * b > 1)
+	{
+		// Red component
+		k0 = +1.19086277f; k1 = +1.76576728f; k2 = +0.59662641f; k3 = +0.75515197f; k4 = +0.56771245f;
+		wl = +4.0767416621f; wm = -3.3077115913f; ws = +0.2309699292f;
+	}
+	else if (1.81444104f * a - 1.19445276f * b > 1)
+	{
+		// Green component
+		k0 = +0.73956515f; k1 = -0.45954404f; k2 = +0.08285427f; k3 = +0.12541070f; k4 = +0.14503204f;
+		wl = -1.2684380046f; wm = +2.6097574011f; ws = -0.3413193965f;
+	}
+	else
+	{
+		// Blue component
+		k0 = +1.35733652f; k1 = -0.00915799f; k2 = -1.15130210f; k3 = -0.50559606f; k4 = +0.00692167f;
+		wl = -0.0041960863f; wm = -0.7034186147f; ws = +1.7076147010f;
+	}
+
+	// Approximate max saturation using a polynomial:
+	float S = k0 + k1 * a + k2 * b + k3 * a * a + k4 * a * b;
+
+	// Do one step Halley's method to get closer
+	// this gives an error less than 10e6, except for some blue hues where the dS/dh is close to infinite
+	// this should be sufficient for most applications, otherwise do two/three steps 
+
+	float k_l = +0.3963377774f * a + 0.2158037573f * b;
+	float k_m = -0.1055613458f * a - 0.0638541728f * b;
+	float k_s = -0.0894841775f * a - 1.2914855480f * b;
+
+	{
+		float l_ = 1.f + S * k_l;
+		float m_ = 1.f + S * k_m;
+		float s_ = 1.f + S * k_s;
+
+		float l = l_ * l_ * l_;
+		float m = m_ * m_ * m_;
+		float s = s_ * s_ * s_;
+
+		float l_dS = 3.f * k_l * l_ * l_;
+		float m_dS = 3.f * k_m * m_ * m_;
+		float s_dS = 3.f * k_s * s_ * s_;
+
+		float l_dS2 = 6.f * k_l * k_l * l_;
+		float m_dS2 = 6.f * k_m * k_m * m_;
+		float s_dS2 = 6.f * k_s * k_s * s_;
+
+		float f = wl * l + wm * m + ws * s;
+		float f1 = wl * l_dS + wm * m_dS + ws * s_dS;
+		float f2 = wl * l_dS2 + wm * m_dS2 + ws * s_dS2;
+
+		S = S - f * f1 / (f1 * f1 - 0.5f * f * f2);
+	}
+
+	return S;
+}
+
+// finds L_cusp and C_cusp for a given hue
+// a and b must be normalized so a^2 + b^2 == 1
+LC find_cusp(float a, float b)
+{
+	// First, find the maximum saturation (saturation S = C/L)
+	float S_cusp = compute_max_saturation(a, b);
+
+	// Convert to linear sRGB to find the first point where at least one of r,g or b >= 1:
+	RGB rgb_at_max = oklab_to_linear_srgb({ 1, S_cusp * a, S_cusp * b });
+	float L_cusp = cbrtf(1.f / fmax(fmax(rgb_at_max.r, rgb_at_max.g), rgb_at_max.b));
+	float C_cusp = L_cusp * S_cusp;
+
+	return { L_cusp , C_cusp };
+}
+
+// Finds intersection of the line defined by 
+// L = L0 * (1 - t) + t * L1;
+// C = t * C1;
+// a and b must be normalized so a^2 + b^2 == 1
+float find_gamut_intersection(float a, float b, float L1, float C1, float L0, LC cusp)
+{
+	// Find the intersection for upper and lower half seprately
+	float t;
+	if (((L1 - L0) * cusp.C - (cusp.L - L0) * C1) <= 0.f)
+	{
+		// Lower half
+
+		t = cusp.C * L0 / (C1 * cusp.L + cusp.C * (L0 - L1));
+	}
+	else
+	{
+		// Upper half
+
+		// First intersect with triangle
+		t = cusp.C * (L0 - 1.f) / (C1 * (cusp.L - 1.f) + cusp.C * (L0 - L1));
+
+		// Then one step Halley's method
+		{
+			float dL = L1 - L0;
+			float dC = C1;
+
+			float k_l = +0.3963377774f * a + 0.2158037573f * b;
+			float k_m = -0.1055613458f * a - 0.0638541728f * b;
+			float k_s = -0.0894841775f * a - 1.2914855480f * b;
+
+			float l_dt = dL + dC * k_l;
+			float m_dt = dL + dC * k_m;
+			float s_dt = dL + dC * k_s;
+
+
+			// If higher accuracy is required, 2 or 3 iterations of the following block can be used:
+			{
+				float L = L0 * (1.f - t) + t * L1;
+				float C = t * C1;
+
+				float l_ = L + C * k_l;
+				float m_ = L + C * k_m;
+				float s_ = L + C * k_s;
+
+				float l = l_ * l_ * l_;
+				float m = m_ * m_ * m_;
+				float s = s_ * s_ * s_;
+
+				float ldt = 3 * l_dt * l_ * l_;
+				float mdt = 3 * m_dt * m_ * m_;
+				float sdt = 3 * s_dt * s_ * s_;
+
+				float ldt2 = 6 * l_dt * l_dt * l_;
+				float mdt2 = 6 * m_dt * m_dt * m_;
+				float sdt2 = 6 * s_dt * s_dt * s_;
+
+				float r = 4.0767416621f * l - 3.3077115913f * m + 0.2309699292f * s - 1;
+				float r1 = 4.0767416621f * ldt - 3.3077115913f * mdt + 0.2309699292f * sdt;
+				float r2 = 4.0767416621f * ldt2 - 3.3077115913f * mdt2 + 0.2309699292f * sdt2;
+
+				float u_r = r1 / (r1 * r1 - 0.5f * r * r2);
+				float t_r = -r * u_r;
+
+				float g = -1.2684380046f * l + 2.6097574011f * m - 0.3413193965f * s - 1;
+				float g1 = -1.2684380046f * ldt + 2.6097574011f * mdt - 0.3413193965f * sdt;
+				float g2 = -1.2684380046f * ldt2 + 2.6097574011f * mdt2 - 0.3413193965f * sdt2;
+
+				float u_g = g1 / (g1 * g1 - 0.5f * g * g2);
+				float t_g = -g * u_g;
+
+				b = -0.0041960863f * l - 0.7034186147f * m + 1.7076147010f * s - 1;
+				float b1 = -0.0041960863f * ldt - 0.7034186147f * mdt + 1.7076147010f * sdt;
+				float b2 = -0.0041960863f * ldt2 - 0.7034186147f * mdt2 + 1.7076147010f * sdt2;
+
+				float u_b = b1 / (b1 * b1 - 0.5f * b * b2);
+				float t_b = -b * u_b;
+
+				t_r = u_r >= 0.f ? t_r : FLT_MAX;
+				t_g = u_g >= 0.f ? t_g : FLT_MAX;
+				t_b = u_b >= 0.f ? t_b : FLT_MAX;
+
+				t += fmin(t_r, fmin(t_g, t_b));
+			}
+		}
+	}
+
+	return t;
+}
+
+float find_gamut_intersection(float a, float b, float L1, float C1, float L0)
+{
+	// Find the cusp of the gamut triangle
+	LC cusp = find_cusp(a, b);
+
+	return find_gamut_intersection(a, b, L1, C1, L0, cusp);
+}
+
+RGB gamut_clip_preserve_chroma(RGB rgb)
+{
+	if (rgb.r < 1 && rgb.g < 1 && rgb.b < 1 && rgb.r > 0 && rgb.g > 0 && rgb.b > 0)
+		return rgb;
+
+	Lab lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.L;
+	float eps = 0.00001f;
+	float C = fmax(eps, sqrtf(lab.a * lab.a + lab.b * lab.b));
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	float L0 = clamp(L, 0, 1);
+
+	float t = find_gamut_intersection(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1 - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb({ L_clipped, C_clipped * a_, C_clipped * b_ });
+}
+
+RGB gamut_clip_project_to_0_5(RGB rgb)
+{
+	if (rgb.r < 1 && rgb.g < 1 && rgb.b < 1 && rgb.r > 0 && rgb.g > 0 && rgb.b > 0)
+		return rgb;
+
+	Lab lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.L;
+	float eps = 0.00001f;
+	float C = fmax(eps, sqrtf(lab.a * lab.a + lab.b * lab.b));
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	float L0 = 0.5;
+
+	float t = find_gamut_intersection(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1 - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb({ L_clipped, C_clipped * a_, C_clipped * b_ });
+}
+
+RGB gamut_clip_project_to_L_cusp(RGB rgb)
+{
+	if (rgb.r < 1 && rgb.g < 1 && rgb.b < 1 && rgb.r > 0 && rgb.g > 0 && rgb.b > 0)
+		return rgb;
+
+	Lab lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.L;
+	float eps = 0.00001f;
+	float C = fmax(eps, sqrtf(lab.a * lab.a + lab.b * lab.b));
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	// The cusp is computed here and in find_gamut_intersection, an optimized solution would only compute it once.
+	LC cusp = find_cusp(a_, b_);
+
+	float L0 = cusp.L;
+
+	float t = find_gamut_intersection(a_, b_, L, C, L0);
+
+	float L_clipped = L0 * (1 - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb({ L_clipped, C_clipped * a_, C_clipped * b_ });
+}
+
+RGB gamut_clip_adaptive_L0_0_5(RGB rgb, float alpha = 0.05f)
+{
+	if (rgb.r < 1 && rgb.g < 1 && rgb.b < 1 && rgb.r > 0 && rgb.g > 0 && rgb.b > 0)
+		return rgb;
+
+	Lab lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.L;
+	float eps = 0.00001f;
+	float C = fmax(eps, sqrtf(lab.a * lab.a + lab.b * lab.b));
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	float Ld = L - 0.5f;
+	float e1 = 0.5f + fabs(Ld) + alpha * C;
+	float L0 = 0.5f * (1.f + sgn(Ld) * (e1 - sqrtf(e1 * e1 - 2.f * fabs(Ld))));
+
+	float t = find_gamut_intersection(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb({ L_clipped, C_clipped * a_, C_clipped * b_ });
+}
+
+RGB gamut_clip_adaptive_L0_L_cusp(RGB rgb, float alpha = 0.05f)
+{
+	if (rgb.r < 1 && rgb.g < 1 && rgb.b < 1 && rgb.r > 0 && rgb.g > 0 && rgb.b > 0)
+		return rgb;
+
+	Lab lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.L;
+	float eps = 0.00001f;
+	float C = fmax(eps, sqrtf(lab.a * lab.a + lab.b * lab.b));
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	// The cusp is computed here and in find_gamut_intersection, an optimized solution would only compute it once.
+	LC cusp = find_cusp(a_, b_);
+
+	float Ld = L - cusp.L;
+	float k = 2.f * (Ld > 0 ? 1.f - cusp.L : cusp.L);
+
+	float e1 = 0.5f * k + fabs(Ld) + alpha * C / k;
+	float L0 = cusp.L + 0.5f * (sgn(Ld) * (e1 - sqrtf(e1 * e1 - 2.f * k * fabs(Ld))));
+
+	float t = find_gamut_intersection(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb({ L_clipped, C_clipped * a_, C_clipped * b_ });
+}
+
+float toe(float x)
+{
+	constexpr float k_1 = 0.206f;
+	constexpr float k_2 = 0.03f;
+	constexpr float k_3 = (1.f + k_1) / (1.f + k_2);
+	return 0.5f * (k_3 * x - k_1 + sqrtf((k_3 * x - k_1) * (k_3 * x - k_1) + 4 * k_2 * k_3 * x));
+}
+
+float toe_inv(float x)
+{
+	constexpr float k_1 = 0.206f;
+	constexpr float k_2 = 0.03f;
+	constexpr float k_3 = (1.f + k_1) / (1.f + k_2);
+	return (x * x + k_1 * x) / (k_3 * (x + k_2));
+}
+
+ST to_ST(LC cusp)
+{
+	float L = cusp.L;
+	float C = cusp.C;
+	return { C / L, C / (1 - L) };
+}
+
+// Returns a smooth approximation of the location of the cusp
+// This polynomial was created by an optimization process
+// It has been designed so that S_mid < S_max and T_mid < T_max
+ST get_ST_mid(float a_, float b_)
+{
+	float S = 0.11516993f + 1.f / (
+		+7.44778970f + 4.15901240f * b_
+		+ a_ * (-2.19557347f + 1.75198401f * b_
+			+ a_ * (-2.13704948f - 10.02301043f * b_
+				+ a_ * (-4.24894561f + 5.38770819f * b_ + 4.69891013f * a_
+					)))
+		);
+
+	float T = 0.11239642f + 1.f / (
+		+1.61320320f - 0.68124379f * b_
+		+ a_ * (+0.40370612f + 0.90148123f * b_
+			+ a_ * (-0.27087943f + 0.61223990f * b_
+				+ a_ * (+0.00299215f - 0.45399568f * b_ - 0.14661872f * a_
+					)))
+		);
+
+	return { S, T };
+}
+
+struct Cs { float C_0; float C_mid; float C_max; };
+Cs get_Cs(float L, float a_, float b_)
+{
+	LC cusp = find_cusp(a_, b_);
+
+	float C_max = find_gamut_intersection(a_, b_, L, 1, L, cusp);
+	ST ST_max = to_ST(cusp);
+	
+	// Scale factor to compensate for the curved part of gamut shape:
+	float k = C_max / fmin((L * ST_max.S), (1 - L) * ST_max.T);
+
+	float C_mid;
+	{
+		ST ST_mid = get_ST_mid(a_, b_);
+
+		// Use a soft minimum function, instead of a sharp triangle shape to get a smooth value for chroma.
+		float C_a = L * ST_mid.S;
+		float C_b = (1.f - L) * ST_mid.T;
+		C_mid = 0.9f * k * sqrtf(sqrtf(1.f / (1.f / (C_a * C_a * C_a * C_a) + 1.f / (C_b * C_b * C_b * C_b))));
+	}
+
+	float C_0;
+	{
+		// for C_0, the shape is independent of hue, so ST are constant. Values picked to roughly be the average values of ST.
+		float C_a = L * 0.4f;
+		float C_b = (1.f - L) * 0.8f;
+
+		// Use a soft minimum function, instead of a sharp triangle shape to get a smooth value for chroma.
+		C_0 = sqrtf(1.f / (1.f / (C_a * C_a) + 1.f / (C_b * C_b)));
+	}
+
+	return { C_0, C_mid, C_max };
+}
+
+RGB okhsl_to_srgb(HSL hsl)
+{
+	float h = hsl.h;
+	float s = hsl.s;
+	float l = hsl.l;
+
+	if (l == 1.0f)
+	{
+		return { 1.f, 1.f, 1.f };
+	}
+
+	else if (l == 0.f)
+	{
+		return { 0.f, 0.f, 0.f };
+	}
+
+	float a_ = cosf(2.f * pi * h);
+	float b_ = sinf(2.f * pi * h);
+	float L = toe_inv(l);
+
+	Cs cs = get_Cs(L, a_, b_);
+	float C_0 = cs.C_0;
+	float C_mid = cs.C_mid;
+	float C_max = cs.C_max;
+
+	float mid = 0.8f;
+	float mid_inv = 1.25f;
+
+	float C, t, k_0, k_1, k_2;
+
+	if (s < mid)
+	{
+		t = mid_inv * s;
+
+		k_1 = mid * C_0;
+		k_2 = (1.f - k_1 / C_mid);
+
+		C = t * k_1 / (1.f - k_2 * t);
+	}
+	else
+	{
+		t = (s - mid)/ (1 - mid);
+
+		k_0 = C_mid;
+		k_1 = (1.f - mid) * C_mid * C_mid * mid_inv * mid_inv / C_0;
+		k_2 = (1.f - (k_1) / (C_max - C_mid));
+
+		C = k_0 + t * k_1 / (1.f - k_2 * t);
+	}
+
+	RGB rgb = oklab_to_linear_srgb({ L, C * a_, C * b_ });
+	return {
+		srgb_transfer_function(rgb.r),
+		srgb_transfer_function(rgb.g),
+		srgb_transfer_function(rgb.b),
+	};
+}
+
+HSL srgb_to_okhsl(RGB rgb)
+{
+	Lab lab = linear_srgb_to_oklab({
+		srgb_transfer_function_inv(rgb.r),
+		srgb_transfer_function_inv(rgb.g),
+		srgb_transfer_function_inv(rgb.b)
+		});
+
+	float C = sqrtf(lab.a * lab.a + lab.b * lab.b);
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	float L = lab.L;
+	float h = 0.5f + 0.5f * atan2f(-lab.b, -lab.a) / pi;
+
+	Cs cs = get_Cs(L, a_, b_);
+	float C_0 = cs.C_0;
+	float C_mid = cs.C_mid;
+	float C_max = cs.C_max;
+
+	// Inverse of the interpolation in okhsl_to_srgb:
+
+	float mid = 0.8f;
+	float mid_inv = 1.25f;
+
+	float s;
+	if (C < C_mid)
+	{
+		float k_1 = mid * C_0;
+		float k_2 = (1.f - k_1 / C_mid);
+
+		float t = C / (k_1 + k_2 * C);
+		s = t * mid;
+	}
+	else
+	{
+		float k_0 = C_mid;
+		float k_1 = (1.f - mid) * C_mid * C_mid * mid_inv * mid_inv / C_0;
+		float k_2 = (1.f - (k_1) / (C_max - C_mid));
+
+		float t = (C - k_0) / (k_1 + k_2 * (C - k_0));
+		s = mid + (1.f - mid) * t;
+	}
+
+	float l = toe(L);
+	return { h, s, l };
+}
+
+
+RGB okhsv_to_srgb(HSV hsv)
+{
+	float h = hsv.h;
+	float s = hsv.s;
+	float v = hsv.v;
+
+	float a_ = cosf(2.f * pi * h);
+	float b_ = sinf(2.f * pi * h);
+	
+	LC cusp = find_cusp(a_, b_);
+	ST ST_max = to_ST(cusp);
+	float S_max = ST_max.S;
+	float T_max = ST_max.T;
+	float S_0 = 0.5f;
+	float k = 1 - S_0 / S_max;
+
+	// first we compute L and V as if the gamut is a perfect triangle:
+
+	// L, C when v==1:
+	float L_v = 1     - s * S_0 / (S_0 + T_max - T_max * k * s);
+	float C_v = s * T_max * S_0 / (S_0 + T_max - T_max * k * s);
+
+	float L = v * L_v;
+	float C = v * C_v;
+
+	// then we compensate for both toe and the curved top part of the triangle:
+	float L_vt = toe_inv(L_v);
+	float C_vt = C_v * L_vt / L_v;
+
+	float L_new = toe_inv(L);
+	C = C * L_new / L;
+	L = L_new;
+
+	RGB rgb_scale = oklab_to_linear_srgb({ L_vt, a_ * C_vt, b_ * C_vt });
+	float scale_L = cbrtf(1.f / fmax(fmax(rgb_scale.r, rgb_scale.g), fmax(rgb_scale.b, 0.f)));
+
+	L = L * scale_L;
+	C = C * scale_L;
+
+	RGB rgb = oklab_to_linear_srgb({ L, C * a_, C * b_ });
+	return {
+		srgb_transfer_function(rgb.r),
+		srgb_transfer_function(rgb.g),
+		srgb_transfer_function(rgb.b),
+	};
+}
+
+HSV srgb_to_okhsv(RGB rgb)
+{
+	Lab lab = linear_srgb_to_oklab({
+		srgb_transfer_function_inv(rgb.r),
+		srgb_transfer_function_inv(rgb.g),
+		srgb_transfer_function_inv(rgb.b)
+		});
+
+	float C = sqrtf(lab.a * lab.a + lab.b * lab.b);
+	float a_ = lab.a / C;
+	float b_ = lab.b / C;
+
+	float L = lab.L;
+	float h = 0.5f + 0.5f * atan2f(-lab.b, -lab.a) / pi;
+
+	LC cusp = find_cusp(a_, b_);
+	ST ST_max = to_ST(cusp);
+	float S_max = ST_max.S;
+	float T_max = ST_max.T;
+	float S_0 = 0.5f;
+	float k = 1 - S_0 / S_max;
+
+	// first we find L_v, C_v, L_vt and C_vt
+
+	float t = T_max / (C + L * T_max);
+	float L_v = t * L;
+	float C_v = t * C;
+
+	float L_vt = toe_inv(L_v);
+	float C_vt = C_v * L_vt / L_v;
+
+	// we can then use these to invert the step that compensates for the toe and the curved top part of the triangle:
+	RGB rgb_scale = oklab_to_linear_srgb({ L_vt, a_ * C_vt, b_ * C_vt });
+	float scale_L = cbrtf(1.f / fmax(fmax(rgb_scale.r, rgb_scale.g), fmax(rgb_scale.b, 0.f)));
+
+	L = L / scale_L;
+	C = C / scale_L;
+
+	C = C * toe(L) / L;
+	L = toe(L);
+
+	// we can now compute v and s:
+
+	float v = L / L_v;
+	float s = (S_0 + T_max) * C_v / ((T_max * S_0) + T_max * k * C_v);
+
+	return { h, s, v };
+}
+
+};
+#endif // OK_COLOR_H
diff --git a/thirdparty/misc/ok_color_shader.h b/thirdparty/misc/ok_color_shader.h
new file mode 100644
index 0000000000..40d83366ee
--- /dev/null
+++ b/thirdparty/misc/ok_color_shader.h
@@ -0,0 +1,663 @@
+// Copyright(c) 2021 Björn Ottosson
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files(the "Software"), to deal in
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and /or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef OK_COLOR_SHADER_H
+#define OK_COLOR_SHADER_H
+
+#include "core/string/ustring.h"
+
+static String OK_COLOR_SHADER = R"(shader_type canvas_item;
+
+const float M_PI = 3.1415926535897932384626433832795;
+
+float cbrt( float x )
+{
+    return sign(x)*pow(abs(x),1.0f/3.0f);
+}
+
+float srgb_transfer_function(float a)
+{
+	return .0031308f >= a ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
+}
+
+float srgb_transfer_function_inv(float a)
+{
+	return .04045f < a ? pow((a + .055f) / 1.055f, 2.4f) : a / 12.92f;
+}
+
+vec3 linear_srgb_to_oklab(vec3 c)
+{
+	float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b;
+	float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b;
+	float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b;
+
+	float l_ = cbrt(l);
+	float m_ = cbrt(m);
+	float s_ = cbrt(s);
+
+	return vec3(
+		0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
+		1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_,
+		0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_
+	);
+}
+
+vec3 oklab_to_linear_srgb(vec3 c)
+{
+	float l_ = c.x + 0.3963377774f * c.y + 0.2158037573f * c.z;
+	float m_ = c.x - 0.1055613458f * c.y - 0.0638541728f * c.z;
+	float s_ = c.x - 0.0894841775f * c.y - 1.2914855480f * c.z;
+
+	float l = l_ * l_ * l_;
+	float m = m_ * m_ * m_;
+	float s = s_ * s_ * s_;
+
+	return vec3(
+		+4.0767416621f * l - 3.3077115913f * m + 0.2309699292f * s,
+		-1.2684380046f * l + 2.6097574011f * m - 0.3413193965f * s,
+		-0.0041960863f * l - 0.7034186147f * m + 1.7076147010f * s
+	);
+}
+
+// Finds the maximum saturation possible for a given hue that fits in sRGB
+// Saturation here is defined as S = C/L
+// a and b must be normalized so a^2 + b^2 == 1
+float compute_max_saturation(float a, float b)
+{
+	// Max saturation will be when one of r, g or b goes below zero.
+
+	// Select different coefficients depending on which component goes below zero first
+	float k0, k1, k2, k3, k4, wl, wm, ws;
+
+	if (-1.88170328f * a - 0.80936493f * b > 1.f)
+	{
+		// Red component
+		k0 = +1.19086277f; k1 = +1.76576728f; k2 = +0.59662641f; k3 = +0.75515197f; k4 = +0.56771245f;
+		wl = +4.0767416621f; wm = -3.3077115913f; ws = +0.2309699292f;
+	}
+	else if (1.81444104f * a - 1.19445276f * b > 1.f)
+	{
+		// Green component
+		k0 = +0.73956515f; k1 = -0.45954404f; k2 = +0.08285427f; k3 = +0.12541070f; k4 = +0.14503204f;
+		wl = -1.2684380046f; wm = +2.6097574011f; ws = -0.3413193965f;
+	}
+	else
+	{
+		// Blue component
+		k0 = +1.35733652f; k1 = -0.00915799f; k2 = -1.15130210f; k3 = -0.50559606f; k4 = +0.00692167f;
+		wl = -0.0041960863f; wm = -0.7034186147f; ws = +1.7076147010f;
+	}
+
+	// Approximate max saturation using a polynomial:
+	float S = k0 + k1 * a + k2 * b + k3 * a * a + k4 * a * b;
+
+	// Do one step Halley's method to get closer
+	// this gives an error less than 10e6, except for some blue hues where the dS/dh is close to infinite
+	// this should be sufficient for most applications, otherwise do two/three steps 
+
+	float k_l = +0.3963377774f * a + 0.2158037573f * b;
+	float k_m = -0.1055613458f * a - 0.0638541728f * b;
+	float k_s = -0.0894841775f * a - 1.2914855480f * b;
+
+	{
+		float l_ = 1.f + S * k_l;
+		float m_ = 1.f + S * k_m;
+		float s_ = 1.f + S * k_s;
+
+		float l = l_ * l_ * l_;
+		float m = m_ * m_ * m_;
+		float s = s_ * s_ * s_;
+
+		float l_dS = 3.f * k_l * l_ * l_;
+		float m_dS = 3.f * k_m * m_ * m_;
+		float s_dS = 3.f * k_s * s_ * s_;
+
+		float l_dS2 = 6.f * k_l * k_l * l_;
+		float m_dS2 = 6.f * k_m * k_m * m_;
+		float s_dS2 = 6.f * k_s * k_s * s_;
+
+		float f = wl * l + wm * m + ws * s;
+		float f1 = wl * l_dS + wm * m_dS + ws * s_dS;
+		float f2 = wl * l_dS2 + wm * m_dS2 + ws * s_dS2;
+
+		S = S - f * f1 / (f1 * f1 - 0.5f * f * f2);
+	}
+
+	return S;
+}
+
+// finds L_cusp and C_cusp for a given hue
+// a and b must be normalized so a^2 + b^2 == 1
+vec2 find_cusp(float a, float b)
+{
+	// First, find the maximum saturation (saturation S = C/L)
+	float S_cusp = compute_max_saturation(a, b);
+
+	// Convert to linear sRGB to find the first point where at least one of r,g or b >= 1:
+	vec3 rgb_at_max = oklab_to_linear_srgb(vec3( 1, S_cusp * a, S_cusp * b ));
+	float L_cusp = cbrt(1.f / max(max(rgb_at_max.r, rgb_at_max.g), rgb_at_max.b));
+	float C_cusp = L_cusp * S_cusp;
+
+	return vec2( L_cusp , C_cusp );
+} )"
+R"(// Finds intersection of the line defined by 
+// L = L0 * (1 - t) + t * L1;
+// C = t * C1;
+// a and b must be normalized so a^2 + b^2 == 1
+float find_gamut_intersection(float a, float b, float L1, float C1, float L0, vec2 cusp)
+{
+	// Find the intersection for upper and lower half seprately
+	float t;
+	if (((L1 - L0) * cusp.y - (cusp.x - L0) * C1) <= 0.f)
+	{
+		// Lower half
+
+		t = cusp.y * L0 / (C1 * cusp.x + cusp.y * (L0 - L1));
+	}
+	else
+	{
+		// Upper half
+
+		// First intersect with triangle
+		t = cusp.y * (L0 - 1.f) / (C1 * (cusp.x - 1.f) + cusp.y * (L0 - L1));
+
+		// Then one step Halley's method
+		{
+			float dL = L1 - L0;
+			float dC = C1;
+
+			float k_l = +0.3963377774f * a + 0.2158037573f * b;
+			float k_m = -0.1055613458f * a - 0.0638541728f * b;
+			float k_s = -0.0894841775f * a - 1.2914855480f * b;
+
+			float l_dt = dL + dC * k_l;
+			float m_dt = dL + dC * k_m;
+			float s_dt = dL + dC * k_s;
+
+
+			// If higher accuracy is required, 2 or 3 iterations of the following block can be used:
+			{
+				float L = L0 * (1.f - t) + t * L1;
+				float C = t * C1;
+
+				float l_ = L + C * k_l;
+				float m_ = L + C * k_m;
+				float s_ = L + C * k_s;
+
+				float l = l_ * l_ * l_;
+				float m = m_ * m_ * m_;
+				float s = s_ * s_ * s_;
+
+				float ldt = 3.f * l_dt * l_ * l_;
+				float mdt = 3.f * m_dt * m_ * m_;
+				float sdt = 3.f * s_dt * s_ * s_;
+
+				float ldt2 = 6.f * l_dt * l_dt * l_;
+				float mdt2 = 6.f * m_dt * m_dt * m_;
+				float sdt2 = 6.f * s_dt * s_dt * s_;
+
+				float r = 4.0767416621f * l - 3.3077115913f * m + 0.2309699292f * s - 1.f;
+				float r1 = 4.0767416621f * ldt - 3.3077115913f * mdt + 0.2309699292f * sdt;
+				float r2 = 4.0767416621f * ldt2 - 3.3077115913f * mdt2 + 0.2309699292f * sdt2;
+
+				float u_r = r1 / (r1 * r1 - 0.5f * r * r2);
+				float t_r = -r * u_r;
+
+				float g = -1.2684380046f * l + 2.6097574011f * m - 0.3413193965f * s - 1.f;
+				float g1 = -1.2684380046f * ldt + 2.6097574011f * mdt - 0.3413193965f * sdt;
+				float g2 = -1.2684380046f * ldt2 + 2.6097574011f * mdt2 - 0.3413193965f * sdt2;
+
+				float u_g = g1 / (g1 * g1 - 0.5f * g * g2);
+				float t_g = -g * u_g;
+
+				float b = -0.0041960863f * l - 0.7034186147f * m + 1.7076147010f * s - 1.f;
+				float b1 = -0.0041960863f * ldt - 0.7034186147f * mdt + 1.7076147010f * sdt;
+				float b2 = -0.0041960863f * ldt2 - 0.7034186147f * mdt2 + 1.7076147010f * sdt2;
+
+				float u_b = b1 / (b1 * b1 - 0.5f * b * b2);
+				float t_b = -b * u_b;
+
+				t_r = u_r >= 0.f ? t_r : 10000.f;
+				t_g = u_g >= 0.f ? t_g : 10000.f;
+				t_b = u_b >= 0.f ? t_b : 10000.f;
+
+				t += min(t_r, min(t_g, t_b));
+			}
+		}
+	}
+
+	return t;
+}
+
+float find_gamut_intersection_5(float a, float b, float L1, float C1, float L0)
+{
+	// Find the cusp of the gamut triangle
+	vec2 cusp = find_cusp(a, b);
+
+	return find_gamut_intersection(a, b, L1, C1, L0, cusp);
+})"
+R"(
+
+vec3 gamut_clip_preserve_chroma(vec3 rgb)
+{
+	if (rgb.r < 1.f && rgb.g < 1.f && rgb.b < 1.f && rgb.r > 0.f && rgb.g > 0.f && rgb.b > 0.f)
+		return rgb;
+
+	vec3 lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.x;
+	float eps = 0.00001f;
+	float C = max(eps, sqrt(lab.y * lab.y + lab.z * lab.z));
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	float L0 = clamp(L, 0.f, 1.f);
+
+	float t = find_gamut_intersection_5(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb(vec3( L_clipped, C_clipped * a_, C_clipped * b_ ));
+}
+
+vec3 gamut_clip_project_to_0_5(vec3 rgb)
+{
+	if (rgb.r < 1.f && rgb.g < 1.f && rgb.b < 1.f && rgb.r > 0.f && rgb.g > 0.f && rgb.b > 0.f)
+		return rgb;
+
+	vec3 lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.x;
+	float eps = 0.00001f;
+	float C = max(eps, sqrt(lab.y * lab.y + lab.z * lab.z));
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	float L0 = 0.5;
+
+	float t = find_gamut_intersection_5(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb(vec3( L_clipped, C_clipped * a_, C_clipped * b_ ));
+}
+
+vec3 gamut_clip_project_to_L_cusp(vec3 rgb)
+{
+	if (rgb.r < 1.f && rgb.g < 1.f && rgb.b < 1.f && rgb.r > 0.f && rgb.g > 0.f && rgb.b > 0.f)
+		return rgb;
+
+	vec3 lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.x;
+	float eps = 0.00001f;
+	float C = max(eps, sqrt(lab.y * lab.y + lab.z * lab.z));
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	// The cusp is computed here and in find_gamut_intersection, an optimized solution would only compute it once.
+	vec2 cusp = find_cusp(a_, b_);
+
+	float L0 = cusp.x;
+
+	float t = find_gamut_intersection_5(a_, b_, L, C, L0);
+
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb(vec3( L_clipped, C_clipped * a_, C_clipped * b_ ));
+}
+
+vec3 gamut_clip_adaptive_L0_0_5(vec3 rgb, float alpha)
+{
+	if (rgb.r < 1.f && rgb.g < 1.f && rgb.b < 1.f && rgb.r > 0.f && rgb.g > 0.f && rgb.b > 0.f)
+		return rgb;
+
+	vec3 lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.x;
+	float eps = 0.00001f;
+	float C = max(eps, sqrt(lab.y * lab.y + lab.z * lab.z));
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	float Ld = L - 0.5f;
+	float e1 = 0.5f + abs(Ld) + alpha * C;
+	float L0 = 0.5f * (1.f + sign(Ld) * (e1 - sqrt(e1 * e1 - 2.f * abs(Ld))));
+
+	float t = find_gamut_intersection_5(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb(vec3( L_clipped, C_clipped * a_, C_clipped * b_ ));
+}
+
+vec3 gamut_clip_adaptive_L0_L_cusp(vec3 rgb, float alpha)
+{
+	if (rgb.r < 1.f && rgb.g < 1.f && rgb.b < 1.f && rgb.r > 0.f && rgb.g > 0.f && rgb.b > 0.f)
+		return rgb;
+
+	vec3 lab = linear_srgb_to_oklab(rgb);
+
+	float L = lab.x;
+	float eps = 0.00001f;
+	float C = max(eps, sqrt(lab.y * lab.y + lab.z * lab.z));
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	// The cusp is computed here and in find_gamut_intersection, an optimized solution would only compute it once.
+	vec2 cusp = find_cusp(a_, b_);
+
+	float Ld = L - cusp.x;
+	float k = 2.f * (Ld > 0.f ? 1.f - cusp.x : cusp.x);
+
+	float e1 = 0.5f * k + abs(Ld) + alpha * C / k;
+	float L0 = cusp.x + 0.5f * (sign(Ld) * (e1 - sqrt(e1 * e1 - 2.f * k * abs(Ld))));
+
+	float t = find_gamut_intersection_5(a_, b_, L, C, L0);
+	float L_clipped = L0 * (1.f - t) + t * L;
+	float C_clipped = t * C;
+
+	return oklab_to_linear_srgb(vec3( L_clipped, C_clipped * a_, C_clipped * b_ ));
+}
+
+float toe(float x)
+{
+	float k_1 = 0.206f;
+	float k_2 = 0.03f;
+	float k_3 = (1.f + k_1) / (1.f + k_2);
+	return 0.5f * (k_3 * x - k_1 + sqrt((k_3 * x - k_1) * (k_3 * x - k_1) + 4.f * k_2 * k_3 * x));
+}
+
+float toe_inv(float x)
+{
+	float k_1 = 0.206f;
+	float k_2 = 0.03f;
+	float k_3 = (1.f + k_1) / (1.f + k_2);
+	return (x * x + k_1 * x) / (k_3 * (x + k_2));
+}
+)" 
+R"(vec2 to_ST(vec2 cusp)
+{
+	float L = cusp.x;
+	float C = cusp.y;
+	return vec2( C / L, C / (1.f - L) );
+}
+
+// Returns a smooth approximation of the location of the cusp
+// This polynomial was created by an optimization process
+// It has been designed so that S_mid < S_max and T_mid < T_max
+vec2 get_ST_mid(float a_, float b_)
+{
+	float S = 0.11516993f + 1.f / (
+		+7.44778970f + 4.15901240f * b_
+		+ a_ * (-2.19557347f + 1.75198401f * b_
+			+ a_ * (-2.13704948f - 10.02301043f * b_
+				+ a_ * (-4.24894561f + 5.38770819f * b_ + 4.69891013f * a_
+					)))
+		);
+
+	float T = 0.11239642f + 1.f / (
+		+1.61320320f - 0.68124379f * b_
+		+ a_ * (+0.40370612f + 0.90148123f * b_
+			+ a_ * (-0.27087943f + 0.61223990f * b_
+				+ a_ * (+0.00299215f - 0.45399568f * b_ - 0.14661872f * a_
+					)))
+		);
+
+	return vec2( S, T );
+}
+
+vec3 get_Cs(float L, float a_, float b_)
+{
+	vec2 cusp = find_cusp(a_, b_);
+
+	float C_max = find_gamut_intersection(a_, b_, L, 1.f, L, cusp);
+	vec2 ST_max = to_ST(cusp);
+	
+	// Scale factor to compensate for the curved part of gamut shape:
+	float k = C_max / min((L * ST_max.x), (1.f - L) * ST_max.y);
+
+	float C_mid;
+	{
+		vec2 ST_mid = get_ST_mid(a_, b_);
+
+		// Use a soft minimum function, instead of a sharp triangle shape to get a smooth value for chroma.
+		float C_a = L * ST_mid.x;
+		float C_b = (1.f - L) * ST_mid.y;
+		C_mid = 0.9f * k * sqrt(sqrt(1.f / (1.f / (C_a * C_a * C_a * C_a) + 1.f / (C_b * C_b * C_b * C_b))));
+	}
+
+	float C_0;
+	{
+		// for C_0, the shape is independent of hue, so vec2 are constant. Values picked to roughly be the average values of vec2.
+		float C_a = L * 0.4f;
+		float C_b = (1.f - L) * 0.8f;
+
+		// Use a soft minimum function, instead of a sharp triangle shape to get a smooth value for chroma.
+		C_0 = sqrt(1.f / (1.f / (C_a * C_a) + 1.f / (C_b * C_b)));
+	}
+
+	return vec3( C_0, C_mid, C_max );
+}
+
+vec3 okhsl_to_srgb(vec3 hsl)
+{
+	float h = hsl.x;
+	float s = hsl.y;
+	float l = hsl.z;
+
+	if (l == 1.0f)
+	{
+		return vec3( 1.f, 1.f, 1.f );
+	}
+
+	else if (l == 0.f)
+	{
+		return vec3( 0.f, 0.f, 0.f );
+	}
+
+	float a_ = cos(2.f * M_PI * h);
+	float b_ = sin(2.f * M_PI * h);
+	float L = toe_inv(l);
+
+	vec3 cs = get_Cs(L, a_, b_);
+	float C_0 = cs.x;
+	float C_mid = cs.y;
+	float C_max = cs.z;
+
+	float mid = 0.8f;
+	float mid_inv = 1.25f;
+
+	float C, t, k_0, k_1, k_2;
+
+	if (s < mid)
+	{
+		t = mid_inv * s;
+
+		k_1 = mid * C_0;
+		k_2 = (1.f - k_1 / C_mid);
+
+		C = t * k_1 / (1.f - k_2 * t);
+	}
+	else
+	{
+		t = (s - mid)/ (1.f - mid);
+
+		k_0 = C_mid;
+		k_1 = (1.f - mid) * C_mid * C_mid * mid_inv * mid_inv / C_0;
+		k_2 = (1.f - (k_1) / (C_max - C_mid));
+
+		C = k_0 + t * k_1 / (1.f - k_2 * t);
+	}
+
+	vec3 rgb = oklab_to_linear_srgb(vec3( L, C * a_, C * b_ ));
+	return vec3(
+		srgb_transfer_function(rgb.r),
+		srgb_transfer_function(rgb.g),
+		srgb_transfer_function(rgb.b)
+	);
+}
+
+vec3 srgb_to_okhsl(vec3 rgb)
+{
+	vec3 lab = linear_srgb_to_oklab(vec3(
+		srgb_transfer_function_inv(rgb.r),
+		srgb_transfer_function_inv(rgb.g),
+		srgb_transfer_function_inv(rgb.b)
+		));
+
+	float C = sqrt(lab.y * lab.y + lab.z * lab.z);
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	float L = lab.x;
+	float h = 0.5f + 0.5f * atan(-lab.z, -lab.y) / M_PI;
+
+	vec3 cs = get_Cs(L, a_, b_);
+	float C_0 = cs.x;
+	float C_mid = cs.y;
+	float C_max = cs.z;
+
+	// Inverse of the interpolation in okhsl_to_srgb:
+
+	float mid = 0.8f;
+	float mid_inv = 1.25f;
+
+	float s;
+	if (C < C_mid)
+	{
+		float k_1 = mid * C_0;
+		float k_2 = (1.f - k_1 / C_mid);
+
+		float t = C / (k_1 + k_2 * C);
+		s = t * mid;
+	}
+	else
+	{
+		float k_0 = C_mid;
+		float k_1 = (1.f - mid) * C_mid * C_mid * mid_inv * mid_inv / C_0;
+		float k_2 = (1.f - (k_1) / (C_max - C_mid));
+
+		float t = (C - k_0) / (k_1 + k_2 * (C - k_0));
+		s = mid + (1.f - mid) * t;
+	}
+
+	float l = toe(L);
+	return vec3( h, s, l );
+}
+
+
+vec3 okhsv_to_srgb(vec3 hsv)
+{
+	float h = hsv.x;
+	float s = hsv.y;
+	float v = hsv.z;
+
+	float a_ = cos(2.f * M_PI * h);
+	float b_ = sin(2.f * M_PI * h);
+	
+	vec2 cusp = find_cusp(a_, b_);
+	vec2 ST_max = to_ST(cusp);
+	float S_max = ST_max.x;
+	float T_max = ST_max.y;
+	float S_0 = 0.5f;
+	float k = 1.f- S_0 / S_max;
+
+	// first we compute L and V as if the gamut is a perfect triangle:
+
+	// L, C when v==1:
+	float L_v = 1.f   - s * S_0 / (S_0 + T_max - T_max * k * s);
+	float C_v = s * T_max * S_0 / (S_0 + T_max - T_max * k * s);
+
+	float L = v * L_v;
+	float C = v * C_v;
+
+	// then we compensate for both toe and the curved top part of the triangle:
+	float L_vt = toe_inv(L_v);
+	float C_vt = C_v * L_vt / L_v;
+
+	float L_new = toe_inv(L);
+	C = C * L_new / L;
+	L = L_new;
+
+	vec3 rgb_scale = oklab_to_linear_srgb(vec3( L_vt, a_ * C_vt, b_ * C_vt ));
+	float scale_L = cbrt(1.f / max(max(rgb_scale.r, rgb_scale.g), max(rgb_scale.b, 0.f)));
+
+	L = L * scale_L;
+	C = C * scale_L;
+
+	vec3 rgb = oklab_to_linear_srgb(vec3( L, C * a_, C * b_ ));
+	return vec3(
+		srgb_transfer_function(rgb.r),
+		srgb_transfer_function(rgb.g),
+		srgb_transfer_function(rgb.b)
+	);
+}
+)"
+R"(
+vec3 srgb_to_okhsv(vec3 rgb)
+{
+	vec3 lab = linear_srgb_to_oklab(vec3(
+		srgb_transfer_function_inv(rgb.r),
+		srgb_transfer_function_inv(rgb.g),
+		srgb_transfer_function_inv(rgb.b)
+		));
+
+	float C = sqrt(lab.y * lab.y + lab.z * lab.z);
+	float a_ = lab.y / C;
+	float b_ = lab.z / C;
+
+	float L = lab.x;
+	float h = 0.5f + 0.5f * atan(-lab.z, -lab.y) / M_PI;
+
+	vec2 cusp = find_cusp(a_, b_);
+	vec2 ST_max = to_ST(cusp);
+	float S_max = ST_max.x;
+	float T_max = ST_max.y;
+	float S_0 = 0.5f;
+	float k = 1.f - S_0 / S_max;
+
+	// first we find L_v, C_v, L_vt and C_vt
+
+	float t = T_max / (C + L * T_max);
+	float L_v = t * L;
+	float C_v = t * C;
+
+	float L_vt = toe_inv(L_v);
+	float C_vt = C_v * L_vt / L_v;
+
+	// we can then use these to invert the step that compensates for the toe and the curved top part of the triangle:
+	vec3 rgb_scale = oklab_to_linear_srgb(vec3( L_vt, a_ * C_vt, b_ * C_vt ));
+	float scale_L = cbrt(1.f / max(max(rgb_scale.r, rgb_scale.g), max(rgb_scale.b, 0.f)));
+
+	L = L / scale_L;
+	C = C / scale_L;
+
+	C = C * toe(L) / L;
+	L = toe(L);
+
+	// we can now compute v and s:
+
+	float v = L / L_v;
+	float s = (S_0 + T_max) * C_v / ((T_max * S_0) + T_max * k * C_v);
+
+	return vec3 (h, s, v );
+})";
+
+#endif
diff --git a/thirdparty/vulkan/vk_mem_alloc.h b/thirdparty/vulkan/vk_mem_alloc.h
index d96f2dacc0..184ee005d8 100644
--- a/thirdparty/vulkan/vk_mem_alloc.h
+++ b/thirdparty/vulkan/vk_mem_alloc.h
@@ -25,7 +25,7 @@
 
 /** \mainpage Vulkan Memory Allocator
 
-<b>Version 3.0.1-development (2022-03-28)</b>
+<b>Version 3.0.1 (2022-05-26)</b>
 
 Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. \n
 License: MIT
@@ -300,9 +300,9 @@ extern "C" {
 
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-// 
+//
 //    INTERFACE
-// 
+//
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -488,7 +488,7 @@ typedef enum VmaMemoryUsage
     When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT),
     you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
     in VmaAllocationCreateInfo::flags.
-    
+
     It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g.
     vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo()
     and not with generic memory allocation functions.
@@ -552,7 +552,7 @@ typedef enum VmaAllocationCreateFlagBits
     */
     VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004,
     /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead.
-    
+
     Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a
     null-terminated string. Instead of copying pointer value, a local copy of the
     string is made and stored in allocation's `pName`. The string is automatically
@@ -579,14 +579,14 @@ typedef enum VmaAllocationCreateFlagBits
     */
     VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100,
     /** \brief Set this flag if the allocated memory will have aliasing resources.
-    
+
     Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified.
     Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors.
     */
     VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200,
     /**
     Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
-    
+
     - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
       you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
     - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
@@ -602,7 +602,7 @@ typedef enum VmaAllocationCreateFlagBits
     VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400,
     /**
     Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT).
-    
+
     - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value,
       you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect.
     - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`.
@@ -724,7 +724,7 @@ typedef enum VmaDefragmentationFlagBits
     VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8,
 
     /// A bit mask to extract only `ALGORITHM` bits from entire set of flags.
-    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = 
+    VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK =
         VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT |
         VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT |
         VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT |
@@ -980,7 +980,7 @@ typedef struct VmaVulkanFunctions
 #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
     /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
     PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR;
-    /// Fetch "vkGetImageMemoryRequirements 2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+    /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
     PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR;
 #endif
 #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
@@ -1117,19 +1117,19 @@ typedef struct VmaStatistics
     */
     uint32_t blockCount;
     /** \brief Number of #VmaAllocation objects allocated.
-    
+
     Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`.
     */
     uint32_t allocationCount;
     /** \brief Number of bytes allocated in `VkDeviceMemory` blocks.
-    
+
     \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object
     (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls
     "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image.
     */
     VkDeviceSize blockBytes;
     /** \brief Total number of bytes occupied by all #VmaAllocation objects.
-    
+
     Always less or equal than `blockBytes`.
     Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan
     but unused by any #VmaAllocation.
@@ -1387,9 +1387,9 @@ typedef struct VmaAllocationInfo
     */
     void* VMA_NULLABLE pUserData;
     /** \brief Custom allocation name that was set with vmaSetAllocationName().
-    
+
     It can change after call to vmaSetAllocationName() for this allocation.
-    
+
     Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with
     additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED].
     */
@@ -1429,7 +1429,7 @@ typedef struct VmaDefragmentationMove
     /// Allocation that should be moved.
     VmaAllocation VMA_NOT_NULL srcAllocation;
     /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`.
-    
+
     \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass,
     to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory().
     vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory.
@@ -1446,16 +1446,16 @@ typedef struct VmaDefragmentationPassMoveInfo
     /// Number of elements in the `pMoves` array.
     uint32_t moveCount;
     /** \brief Array of moves to be performed by the user in the current defragmentation pass.
-    
+
     Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass().
 
     For each element, you should:
-    
+
     1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset.
     2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`.
     3. Make sure these commands finished executing on the GPU.
     4. Destroy the old buffer/image.
-    
+
     Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass().
     After this call, the allocation will point to the new place in memory.
 
@@ -1539,7 +1539,7 @@ typedef struct VmaVirtualAllocationCreateInfo
 typedef struct VmaVirtualAllocationInfo
 {
     /** \brief Offset of the allocation.
-     
+
     Offset at which the allocation was made.
     */
     VkDeviceSize offset;
@@ -2364,7 +2364,7 @@ vkDestroyBuffer(device, buffer, allocationCallbacks);
 vmaFreeMemory(allocator, allocation);
 \endcode
 
-It it safe to pass null as buffer and/or allocation.
+It is safe to pass null as buffer and/or allocation.
 */
 VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer(
     VmaAllocator VMA_NOT_NULL allocator,
@@ -2396,7 +2396,7 @@ vkDestroyImage(device, image, allocationCallbacks);
 vmaFreeMemory(allocator, allocation);
 \endcode
 
-It it safe to pass null as image and/or allocation.
+It is safe to pass null as image and/or allocation.
 */
 VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage(
     VmaAllocator VMA_NOT_NULL allocator,
@@ -2555,9 +2555,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
 
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-// 
+//
 //    IMPLEMENTATION
-// 
+//
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -2578,6 +2578,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString(
 #ifdef _MSC_VER
     #include <intrin.h> // For functions like __popcnt, _BitScanForward etc.
 #endif
+#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20
+    #include <bit> // For std::popcount
+#endif
 
 /*******************************************************************************
 CONFIGURATION SECTION
@@ -3180,12 +3183,16 @@ But you need to check in runtime whether user's CPU supports these, as some old
 */
 static inline uint32_t VmaCountBitsSet(uint32_t v)
 {
+#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20
+    return std::popcount(v);
+#else
     uint32_t c = v - ((v >> 1) & 0x55555555);
     c = ((c >> 2) & 0x33333333) + (c & 0x33333333);
     c = ((c >> 4) + c) & 0x0F0F0F0F;
     c = ((c >> 8) + c) & 0x00FF00FF;
     c = ((c >> 16) + c) & 0x0000FFFF;
     return c;
+#endif
 }
 
 static inline uint8_t VmaBitScanLSB(uint64_t mask)
@@ -3374,60 +3381,6 @@ static inline bool VmaStrIsEmpty(const char* pStr)
     return pStr == VMA_NULL || *pStr == '\0';
 }
 
-#if VMA_STATS_STRING_ENABLED
-static const char* VmaAlgorithmToStr(uint32_t algorithm)
-{
-    switch (algorithm)
-    {
-    case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
-        return "Linear";
-    case 0:
-        return "TLSF";
-    default:
-        VMA_ASSERT(0);
-        return "";
-    }
-}
-#endif // VMA_STATS_STRING_ENABLED
-
-#ifndef VMA_SORT
-template<typename Iterator, typename Compare>
-Iterator VmaQuickSortPartition(Iterator beg, Iterator end, Compare cmp)
-{
-    Iterator centerValue = end; --centerValue;
-    Iterator insertIndex = beg;
-    for (Iterator memTypeIndex = beg; memTypeIndex < centerValue; ++memTypeIndex)
-    {
-        if (cmp(*memTypeIndex, *centerValue))
-        {
-            if (insertIndex != memTypeIndex)
-            {
-                VMA_SWAP(*memTypeIndex, *insertIndex);
-            }
-            ++insertIndex;
-        }
-    }
-    if (insertIndex != centerValue)
-    {
-        VMA_SWAP(*insertIndex, *centerValue);
-    }
-    return insertIndex;
-}
-
-template<typename Iterator, typename Compare>
-void VmaQuickSort(Iterator beg, Iterator end, Compare cmp)
-{
-    if (beg < end)
-    {
-        Iterator it = VmaQuickSortPartition<Iterator, Compare>(beg, end, cmp);
-        VmaQuickSort<Iterator, Compare>(beg, it, cmp);
-        VmaQuickSort<Iterator, Compare>(it + 1, end, cmp);
-    }
-}
-
-#define VMA_SORT(beg, end, cmp) VmaQuickSort(beg, end, cmp)
-#endif // VMA_SORT
-
 /*
 Returns true if two memory blocks occupy overlapping pages.
 ResourceA must be in less memory offset than ResourceB.
@@ -5073,7 +5026,7 @@ public:
     VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src);
     VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete;
     ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); }
-    
+
     size_t GetCount() const { return m_Count; }
     bool IsEmpty() const { return m_Count == 0; }
     ItemType* Front() { return m_Front; }
@@ -5485,7 +5438,7 @@ public:
     // Writes a string value inside "".
     // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped.
     void WriteString(const char* pStr);
-    
+
     // Begins writing a string value.
     // Call BeginString, ContinueString, ContinueString, ..., EndString instead of
     // WriteString to conveniently build the string content incrementally, made of
@@ -6463,7 +6416,7 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size
             (uint32_t)allocation->GetSuballocationType());
 #endif // VMA_STATS_STRING_ENABLED
     }
-    
+
 }
 
 #if VMA_STATS_STRING_ENABLED
@@ -10941,7 +10894,7 @@ public:
     uint32_t GetAlgorithm() const { return m_Algorithm; }
     bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; }
     float GetPriority() const { return m_Priority; }
-    void* const GetAllocationNextPtr() const { return m_pMemoryAllocateNext; }
+    const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; }
     // To be used only while the m_Mutex is locked. Used during defragmentation.
     size_t GetBlockCount() const { return m_Blocks.size(); }
     // To be used only while the m_Mutex is locked. Used during defragmentation.
@@ -12783,7 +12736,7 @@ void VmaBlockVector::IncrementallySortBlocks()
 void VmaBlockVector::SortByFreeSize()
 {
     VMA_SORT(m_Blocks.begin(), m_Blocks.end(),
-        [](auto* b1, auto* b2)
+        [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool
         {
             return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize();
         });
@@ -13029,7 +12982,7 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T(
             }
         }
     }
-    
+
     switch (m_Algorithm)
     {
     case 0: // Default algorithm
@@ -13155,7 +13108,7 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo
             vector = m_pBlockVectors[vectorIndex];
             VMA_ASSERT(vector != VMA_NULL);
         }
-        
+
         switch (move.operation)
         {
         case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY:
@@ -13452,7 +13405,7 @@ bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, Vma
         case CounterStatus::Pass:
             break;
         }
-        
+
         VkDeviceSize offset = moveData.move.srcAllocation->GetOffset();
         if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size)
         {
@@ -13636,7 +13589,7 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector
             prevFreeRegionSize = nextFreeRegionSize;
         }
     }
-    
+
     // No moves perfomed, update statistics to current vector state
     if (startMoveCount == m_Moves.size() && !update)
     {
@@ -13923,7 +13876,7 @@ void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector,
     state.avgFreeSize /= freeCount;
 }
 
-bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, 
+bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType,
     VmaBlockVector& vector, size_t firstFreeBlock,
     bool& texturePresent, bool& bufferPresent, bool& otherPresent)
 {
@@ -15919,6 +15872,7 @@ void VmaAllocator_T::UpdateVulkanBudget()
 void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern)
 {
     if(VMA_DEBUG_INITIALIZE_ALLOCATIONS &&
+        hAllocation->IsMappingAllowed() &&
         (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0)
     {
         void* pData = VMA_NULL;
@@ -16010,8 +15964,8 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
                             json.ContinueString_Size(index++);
                             if (pool->GetName())
                             {
-                                json.WriteString(" - ");
-                                json.WriteString(pool->GetName());
+                                json.ContinueString(" - ");
+                                json.ContinueString(pool->GetName());
                             }
                             json.EndString();
 
@@ -18427,7 +18381,7 @@ for(;;)
         VmaAllocationInfo allocInfo;
         vmaGetAllocationInfo(allocator, pMoves[i].srcAllocation, &allocInfo);
         MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData;
-            
+
         // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset.
         VkImageCreateInfo imgCreateInfo = ...
         VkImage newImg;
@@ -18439,7 +18393,7 @@ for(;;)
         // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place.
         vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...);
     }
-        
+
     // Make sure the copy commands finished executing.
     vkWaitForFences(...);
 
@@ -18451,7 +18405,7 @@ for(;;)
     }
 
     // Update appropriate descriptors to point to the new places...
-        
+
     res = vmaEndDefragmentationPass(allocator, defragCtx, &pass);
     if(res == VK_SUCCESS)
         break;
@@ -18605,7 +18559,7 @@ To do that, fill VmaAllocationCreateInfo::pUserData field when creating
 an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer,
 some handle, index, key, ordinal number or any other value that would associate
 the allocation with your custom metadata.
-It it useful to identify appropriate data structures in your engine given #VmaAllocation,
+It is useful to identify appropriate data structures in your engine given #VmaAllocation,
 e.g. when doing \ref defragmentation.
 
 \code
@@ -18836,14 +18790,14 @@ To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1.
 #include "vk_mem_alloc.h"
 \endcode
 
-It makes memory of all new allocations initialized to bit pattern `0xDCDCDCDC`.
+It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`.
 Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`.
 Memory is automatically mapped and unmapped if necessary.
 
 If you find these values while debugging your program, good chances are that you incorrectly
 read Vulkan memory that is allocated but not initialized, or already freed, respectively.
 
-Memory initialization works only with memory types that are `HOST_VISIBLE`.
+Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped.
 It works also with dedicated allocations.
 
 \section debugging_memory_usage_margins Margins
@@ -19116,13 +19070,13 @@ so you need to create another "staging" allocation and perform explicit transfer
 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
 bufCreateInfo.size = 65536;
 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- 
+
 VmaAllocationCreateInfo allocCreateInfo = {};
 allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
     VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
     VMA_ALLOCATION_CREATE_MAPPED_BIT;
- 
+
 VkBuffer buf;
 VmaAllocation alloc;
 VmaAllocationInfo allocInfo;