309 files changed, 10464 insertions, 8558 deletions
diff --git a/.clang-format b/.clang-format
index bc69a6a3a6..212bc25109 100644
--- a/.clang-format
+++ b/.clang-format
@@ -44,7 +44,6 @@ AllowShortIfStatementsOnASingleLine: true
 BreakBeforeTernaryOperators: false
 # BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: AfterColon
-# BreakAfterJavaFieldAnnotations: false
 # BreakStringLiterals: true
 ColumnLimit:     0
 # CommentPragmas:  '^ IWYU pragma:'
@@ -113,4 +112,8 @@ Language:        ObjC
 ObjCBlockIndentWidth: 4
 # ObjCSpaceAfterProperty: false
 # ObjCSpaceBeforeProtocolList: true
+---
+### Java specific config ###
+Language:        Java
+# BreakAfterJavaFieldAnnotations: false
 ...
diff --git a/AUTHORS.md b/AUTHORS.md
index 73712bbfc4..9a373896f8 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -32,6 +32,7 @@ name is available.
     Alket Rexhepi (alketii)
     Andrea Catania (AndreaCatania)
     Andreas Haas (Hinsbart)
+    Andy Moss (MillionOstrich)
     Anton Yabchinskiy (a12n)
     Aren Villanueva (kurikaesu)
     Ariel Manzur (punto-)
@@ -43,6 +44,7 @@ name is available.
     Carl Olsson (not-surt)
     Dana Olson (adolson)
     Daniel J. Ramirez (djrm)
+    Dmitry Koteroff (Krakean)
     Дмитрий Сальников (DmitriySalnikov)
     Emmanuel Leblond (touilleMan)
     Fabio Alessandrelli (Faless)
@@ -53,7 +55,7 @@ name is available.
     George Marques (vnen)
     Gerrit Großkopf (Grosskopf)
     Gilles Roudiere (groud)
-    Guilherme Felipe (guilhermefelipecgs)
+    Guilherme Felipe de C. G. da Silva (guilhermefelipecgs)
     Hein-Pieter van Braam (hpvb)
     Hiroshi Ogawa (hi-ogawa)
     Hubert Jarosz (Marqin)
@@ -76,6 +78,7 @@ name is available.
     Masoud BH (masoudbh3)
     Matthias Hölzl (hoelzl)
     Max Hilbrunner (mhilbrunner)
+    Michael Alexsander Silva Dias (YeldhamDev)
     Nathan Warden (NathanWarden)
     Nuno Donato (nunodonato)
     Ovnuniarchos
@@ -97,13 +100,18 @@ name is available.
     Timo (toger5)
     V. Vamsi Krishna (vkbsb)
     Vinzenz Feenstra (vinzenz)
+    Will Nations (willnationsdev)
     Wilson E. Alvarez (Rubonnek)
+    Yuri Roubinski (Chaosus)
     Zher Huei Lee (leezh)
     ZuBsPaCe
     박한얼 (volzhs)
     bruvzg
     est31
+    m4nu3lf
     marynate
     mrezai
+    rraallvv
     romulox-x
+    sersoong
     yg2f (SuperUserNameMan)
diff --git a/DONORS.md b/DONORS.md
index 3891a708ce..aea2ef5ae8 100644
--- a/DONORS.md
+++ b/DONORS.md
@@ -12,7 +12,7 @@ generous deed immortalized in the next stable release of Godot Engine.
 
 ## Platinum sponsors
 
-None so far, but your company could be the first! :)
+    Enjin Coin <https://enjincoin.io>
 
 ## Gold sponsors
 
@@ -22,15 +22,16 @@ None so far, but your company could be the first! :)
 ## Mini sponsors
 
     Andreas
-    Andreas Hirschauer
+    Brandon Lamb
     Christian Uldall Pedersen
     Christoph Woinke
     E Hewert
     Hein-Pieter van Braam
+    Jamal Alyafei
+    Jordan M Lucas
     Matthieu Huvé
     Nathan Warden
     Neal Gompa (Conan Kudo)
-    Olimpiu Metiu
     Pascal Julien
     Ruslan Mustakov
     Slobodan Milnovic
@@ -48,15 +49,19 @@ None so far, but your company could be the first! :)
     Officine Pixel S.n.c.
     Rémi Verschelde
     Stephan Lanfermann
+    Zaven Muradyan
 
     Andreas Schüle
     Austen McRae
     Bernhard Liebl
+    Cody Brocious
     Gerald E Butler
     Jahn Johansen
-    Jordan M Lucas
+    Johannes Wuensch
     Kris Michael
+    Libre-Dépanne
     Ranoller
+    Svenne Krap
 
     BanjoNode2D
     Chris Serino
@@ -80,40 +85,49 @@ None so far, but your company could be the first! :)
 
     Amanda Haldy
     Andreas Haas
-    Andres Cuevas
     Arnaud Verstuyf
-    Bryanna M
     Chris Brown
     Cody Parker
     D
+    Daniel Eliasinski
+    Daniel Langegger
+    Eric Monson
     Ezra Theunissen
     flesk
     François Cantin
     Giovanni Solimeno
-    Hendrik Mans
     Jeppe Zapp
     Justin Arnold
     Justo Delgado Baudí
     Leandro Voltolino
     Lucien Boudy
     Noah
+    Patrick Schnorbus
+    Pete Goodwin
     Ryan Estes
+    Ted
+    Travis Womack
     Trent McPheron
 
 ## Silver donors
 
     1D_Inc
+    Abe Pazos
+    Alder Stefano
     Alessandro Senese
     Alex Barsukov
+    Andres Cuevas
     Anthony Bongiovanni
     Avencherus
     Bastian Böhm
     Ben Vercammen
     Blair Allen
     Bryan Stevenson
+    Casey Foote
     Christian Baune
     Christian Winter
     Collin Shooltz
+    Daniel Egger
     Daniel Kaplan
     David Cravens
     David May
@@ -122,11 +136,13 @@ None so far, but your company could be the first! :)
     Eric Martini
     Fabian Becker
     fengjiongmax
+    Francesco Lisi
     Frank C. Simmons
+    Fredy Romero Sam
+    G3Dev sàrl
     Geequlim
     Gerrit Großkopf
     Guldoman
-    Gustav Dahlström
     HardRound
     hatniX
     HeartBeast
@@ -134,6 +150,7 @@ None so far, but your company could be the first! :)
     Hunter Jones
     Jaime Ruiz-Borau Vizárraga
     Jeff Hungerford
+    Jerry Chen
     Jesse Liles
     joe513
     Jonathon
@@ -146,15 +163,15 @@ None so far, but your company could be the first! :)
     Kevin Boyer
     Kevin Kamper Meejach Petersen
     Klavdij Voncina
-    Kobi Malul
     Linus Lind Lundgren
     Lisandro Lorea
     magodev
+    Markus Wiesner
     Martin Novák
     Matthew Fitzpatrick
-    Matthew Valancy
     Matthias Hölzl
     Max R.R. Collada
+    memoryruins
     mhilbrunner
     Michael Gringauz
     Michael Tintiuc
@@ -172,17 +189,18 @@ None so far, but your company could be the first! :)
     Patrick Nafarrete
     Paul Mason
     Paweł Kowal
-    Pete Goodwin
+    Pierre-Igor Berthet
     Pietro Vertechi
     rayos
     Richman Stewart
+    Rodolfo Baeza
+    Roger Burgess
     Roger Smith
     Roman Tinkov
     Sam Van Campenhout
     Sasori Olkof
     Scott D. Yelich
     Sootstone
-    TheHappieCat
     Theo Cranmore
     Thomas Norman
     Tom Larrow
diff --git a/core/class_db.cpp b/core/class_db.cpp
index 57e88044b5..edd49fe95f 100644
--- a/core/class_db.cpp
+++ b/core/class_db.cpp
@@ -1036,7 +1036,6 @@ bool ClassDB::get_property(Object *p_object, const StringName &p_property, Varia
 			r_value = *c;
 			return true;
 		}
-		//if (check->constant_map.fin)
 
 		check = check->inherits_ptr;
 	}
@@ -1163,24 +1162,6 @@ bool ClassDB::has_method(StringName p_class, StringName p_method, bool p_no_inhe
 	return false;
 }
 
-bool ClassDB::get_setter_and_type_for_property(const StringName &p_class, const StringName &p_prop, StringName &r_class, StringName &r_setter) {
-
-	ClassInfo *type = classes.getptr(p_class);
-	ClassInfo *check = type;
-	while (check) {
-
-		if (check->property_setget.has(p_prop)) {
-			r_class = check->name;
-			r_setter = check->property_setget[p_prop].setter;
-			return true;
-		}
-
-		check = check->inherits_ptr;
-	}
-
-	return false;
-}
-
 #ifdef DEBUG_METHODS_ENABLED
 MethodBind *ClassDB::bind_methodfi(uint32_t p_flags, MethodBind *p_bind, const MethodDefinition &method_name, const Variant **p_defs, int p_defcount) {
 	StringName mdname = method_name.name;
diff --git a/core/class_db.h b/core/class_db.h
index b8b681301d..55fe01ec6d 100644
--- a/core/class_db.h
+++ b/core/class_db.h
@@ -349,8 +349,6 @@ public:
 
 	static StringName get_category(const StringName &p_node);
 
-	static bool get_setter_and_type_for_property(const StringName &p_class, const StringName &p_prop, StringName &r_class, StringName &r_setter);
-
 	static void set_class_enabled(StringName p_class, bool p_enable);
 	static bool is_class_enabled(StringName p_class);
 
diff --git a/core/io/json.cpp b/core/io/json.cpp
index ddfc792cc7..82e938d2db 100644
--- a/core/io/json.cpp
+++ b/core/io/json.cpp
@@ -43,7 +43,7 @@ const char *JSON::tk_name[TK_MAX] = {
 	"EOF",
 };
 
-static String _make_indent(const String& p_indent, int p_size) {
+static String _make_indent(const String &p_indent, int p_size) {
 
 	String indent_text = "";
 	if (!p_indent.empty()) {
@@ -53,7 +53,7 @@ static String _make_indent(const String& p_indent, int p_size) {
 	return indent_text;
 }
 
-String JSON::_print_var(const Variant &p_var, const String& p_indent, int p_cur_indent, bool p_sort_keys) {
+String JSON::_print_var(const Variant &p_var, const String &p_indent, int p_cur_indent, bool p_sort_keys) {
 
 	String colon = ":";
 	String end_statement = "";
@@ -116,7 +116,7 @@ String JSON::_print_var(const Variant &p_var, const String& p_indent, int p_cur_
 	}
 }
 
-String JSON::print(const Variant &p_var, const String& p_indent, bool p_sort_keys) {
+String JSON::print(const Variant &p_var, const String &p_indent, bool p_sort_keys) {
 
 	return _print_var(p_var, p_indent, 0, p_sort_keys);
 }
diff --git a/core/io/json.h b/core/io/json.h
index 5e1a89f069..fbb7875c7c 100644
--- a/core/io/json.h
+++ b/core/io/json.h
@@ -64,7 +64,7 @@ class JSON {
 
 	static const char *tk_name[TK_MAX];
 
-	static String _print_var(const Variant &p_var, const String& p_indent, int p_cur_indent, bool p_sort_keys);
+	static String _print_var(const Variant &p_var, const String &p_indent, int p_cur_indent, bool p_sort_keys);
 
 	static Error _get_token(const CharType *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str);
 	static Error _parse_value(Variant &value, Token &token, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str);
@@ -72,7 +72,7 @@ class JSON {
 	static Error _parse_object(Dictionary &object, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str);
 
 public:
-	static String print(const Variant &p_var, const String& p_indent = "", bool p_sort_keys = true);
+	static String print(const Variant &p_var, const String &p_indent = "", bool p_sort_keys = true);
 	static Error parse(const String &p_json, Variant &r_ret, String &r_err_str, int &r_err_line);
 };
 
diff --git a/core/math/a_star.cpp b/core/math/a_star.cpp
index 7e26761abf..4498efeb41 100644
--- a/core/math/a_star.cpp
+++ b/core/math/a_star.cpp
@@ -473,8 +473,8 @@ void AStar::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_point_path", "from_id", "to_id"), &AStar::get_point_path);
 	ClassDB::bind_method(D_METHOD("get_id_path", "from_id", "to_id"), &AStar::get_id_path);
 
-	BIND_VMETHOD(MethodInfo("_estimate_cost", PropertyInfo(Variant::INT, "from_id"), PropertyInfo(Variant::INT, "to_id")));
-	BIND_VMETHOD(MethodInfo("_compute_cost", PropertyInfo(Variant::INT, "from_id"), PropertyInfo(Variant::INT, "to_id")));
+	BIND_VMETHOD(MethodInfo(Variant::REAL, "_estimate_cost", PropertyInfo(Variant::INT, "from_id"), PropertyInfo(Variant::INT, "to_id")));
+	BIND_VMETHOD(MethodInfo(Variant::REAL, "_compute_cost", PropertyInfo(Variant::INT, "from_id"), PropertyInfo(Variant::INT, "to_id")));
 }
 
 AStar::AStar() {
diff --git a/doc/classes/AStar.xml b/doc/classes/AStar.xml
index eefdf4d7a7..e0e3261edf 100644
--- a/doc/classes/AStar.xml
+++ b/doc/classes/AStar.xml
@@ -13,7 +13,7 @@
 	</demos>
 	<methods>
 		<method name="_compute_cost" qualifiers="virtual">
-			<return type="void">
+			<return type="float">
 			</return>
 			<argument index="0" name="from_id" type="int">
 			</argument>
@@ -24,7 +24,7 @@
 			</description>
 		</method>
 		<method name="_estimate_cost" qualifiers="virtual">
-			<return type="void">
+			<return type="float">
 			</return>
 			<argument index="0" name="from_id" type="int">
 			</argument>
diff --git a/doc/classes/Animation.xml b/doc/classes/Animation.xml
index c3933443a0..93b01a466b 100644
--- a/doc/classes/Animation.xml
+++ b/doc/classes/Animation.xml
@@ -30,6 +30,16 @@
 				Clear the animation (clear all tracks and reset all).
 			</description>
 		</method>
+		<method name="copy_track">
+			<return type="void">
+			</return>
+			<argument index="0" name="track" type="int">
+			</argument>
+			<argument index="1" name="to_animation" type="Animation">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="find_track" qualifiers="const">
 			<return type="int">
 			</return>
@@ -244,6 +254,14 @@
 				Insert a generic key in a given track.
 			</description>
 		</method>
+		<method name="track_is_enabled" qualifiers="const">
+			<return type="bool">
+			</return>
+			<argument index="0" name="idx" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="track_is_imported" qualifiers="const">
 			<return type="bool">
 			</return>
@@ -293,6 +311,16 @@
 				Remove a key by position (seconds) in a given track.
 			</description>
 		</method>
+		<method name="track_set_enabled">
+			<return type="void">
+			</return>
+			<argument index="0" name="idx" type="int">
+			</argument>
+			<argument index="1" name="enabled" type="bool">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="track_set_imported">
 			<return type="void">
 			</return>
diff --git a/doc/classes/AnimationPlayer.xml b/doc/classes/AnimationPlayer.xml
index 6ae0debc3a..d61211bb6b 100644
--- a/doc/classes/AnimationPlayer.xml
+++ b/doc/classes/AnimationPlayer.xml
@@ -74,20 +74,6 @@
 				Returns the name of [code]animation[/code] or empty string if not found.
 			</description>
 		</method>
-		<method name="get_anim_length" qualifiers="const">
-			<return type="float">
-			</return>
-			<description>
-				Get the length (in seconds) of the currently playing animation.
-			</description>
-		</method>
-		<method name="get_anim_position" qualifiers="const">
-			<return type="float">
-			</return>
-			<description>
-				Get the position (in seconds) of the currently playing animation.
-			</description>
-		</method>
 		<method name="get_animation" qualifiers="const">
 			<return type="Animation">
 			</return>
@@ -115,11 +101,18 @@
 				Get the blend time (in seconds) between two animations, referenced by their names.
 			</description>
 		</method>
-		<method name="get_current_animation" qualifiers="const">
-			<return type="String">
+		<method name="get_current_animation_length" qualifiers="const">
+			<return type="float">
 			</return>
 			<description>
-				Returns the name of the currently playing animation.
+				Get the length (in seconds) of the currently playing animation.
+			</description>
+		</method>
+		<method name="get_current_animation_position" qualifiers="const">
+			<return type="float">
+			</return>
+			<description>
+				Get the position (in seconds) of the currently playing animation.
 			</description>
 		</method>
 		<method name="has_animation" qualifiers="const">
@@ -217,15 +210,6 @@
 				Specify a blend time (in seconds) between two animations, referenced by their names.
 			</description>
 		</method>
-		<method name="set_current_animation">
-			<return type="void">
-			</return>
-			<argument index="0" name="anim" type="String">
-			</argument>
-			<description>
-				Set the current animation (even if no playback occurs). Using set_current_animation() and set_active() are similar to calling play().
-			</description>
-		</method>
 		<method name="stop">
 			<return type="void">
 			</return>
@@ -253,6 +237,8 @@
 		<member name="current_animation" type="String" setter="set_current_animation" getter="get_current_animation">
 			The name of the current animation. Default value: [code]""[/code].
 		</member>
+		<member name="playback_default_blend_time" type="float" setter="set_default_blend_time" getter="get_default_blend_time">
+		</member>
 		<member name="playback_process_mode" type="int" setter="set_animation_process_mode" getter="get_animation_process_mode" enum="AnimationPlayer.AnimationProcessMode">
 			The process notification in which to update animations. Default value: [enum ANIMATION_PROCESS_IDLE].
 		</member>
diff --git a/doc/classes/AnimationTreePlayer.xml b/doc/classes/AnimationTreePlayer.xml
index 37005c3bd3..ecd1f81951 100644
--- a/doc/classes/AnimationTreePlayer.xml
+++ b/doc/classes/AnimationTreePlayer.xml
@@ -194,19 +194,6 @@
 				Disconnects nodes connected to [code]id[/code] at the specified input slot.
 			</description>
 		</method>
-		<method name="get_base_path" qualifiers="const">
-			<return type="NodePath">
-			</return>
-			<description>
-			</description>
-		</method>
-		<method name="get_master_player" qualifiers="const">
-			<return type="NodePath">
-			</return>
-			<description>
-				Returns the path to the [AnimationPlayer] from which this [code]AnimationTreePlayer[/code] binds animations to animation nodes.
-			</description>
-		</method>
 		<method name="get_node_list">
 			<return type="PoolStringArray">
 			</return>
@@ -214,13 +201,6 @@
 				Returns a PoolStringArray containing the name of all nodes.
 			</description>
 		</method>
-		<method name="is_active" qualifiers="const">
-			<return type="bool">
-			</return>
-			<description>
-				Returns whether this AnimationTreePlayer is active.
-			</description>
-		</method>
 		<method name="mix_node_get_amount" qualifiers="const">
 			<return type="float">
 			</return>
@@ -473,32 +453,6 @@
 				Resets this AnimationTreePlayer.
 			</description>
 		</method>
-		<method name="set_active">
-			<return type="void">
-			</return>
-			<argument index="0" name="enabled" type="bool">
-			</argument>
-			<description>
-				Sets whether this AnimationTreePlayer is active. AnimationTreePlayer will start processing if set to active.
-			</description>
-		</method>
-		<method name="set_base_path">
-			<return type="void">
-			</return>
-			<argument index="0" name="path" type="NodePath">
-			</argument>
-			<description>
-				Sets base path of this AnimationTreePlayer.
-			</description>
-		</method>
-		<method name="set_master_player">
-			<return type="void">
-			</return>
-			<argument index="0" name="nodepath" type="NodePath">
-			</argument>
-			<description>
-			</description>
-		</method>
 		<method name="timescale_node_get_scale" qualifiers="const">
 			<return type="float">
 			</return>
@@ -627,17 +581,17 @@
 		</method>
 	</methods>
 	<members>
-		<member name="playback_process_mode" type="int" setter="set_animation_process_mode" getter="get_animation_process_mode" enum="AnimationTreePlayer.AnimationProcessMode">
-			The thread in which to update animations. Default value: [enum ANIMATION_PROCESS_IDLE].
-		</member>
-		<member name="master_player" type="NodePath" setter="set_master_player" getter="get_master_player">
-			The path to the [AnimationPlayer] from which this [code]AnimationTreePlayer[/code] binds animations to animation nodes.
+		<member name="active" type="bool" setter="set_active" getter="is_active">
+			If [code]true[/code] the [code]AnimationTreePlayer[/code] is able to play animations. Default value: [code]false[/code].
 		</member>
 		<member name="base_path" type="NodePath" setter="set_base_path" getter="get_base_path">
 			The node from which to relatively access other nodes. Default value: [code]".."[/code].
 		</member>
-		<member name="active" type="bool" setter="set_active" getter="is_active">
-			If [code]true[/code] the [code]AnimationTreePlayer[/code] is able to play animations. Default value: [code]false[/code].
+		<member name="master_player" type="NodePath" setter="set_master_player" getter="get_master_player">
+			The path to the [AnimationPlayer] from which this [code]AnimationTreePlayer[/code] binds animations to animation nodes.
+		</member>
+		<member name="playback_process_mode" type="int" setter="set_animation_process_mode" getter="get_animation_process_mode" enum="AnimationTreePlayer.AnimationProcessMode">
+			The thread in which to update animations. Default value: [enum ANIMATION_PROCESS_IDLE].
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/CanvasItem.xml b/doc/classes/CanvasItem.xml
index 4a567981e6..cf0b482b07 100644
--- a/doc/classes/CanvasItem.xml
+++ b/doc/classes/CanvasItem.xml
@@ -88,6 +88,34 @@
 				Draw a line from a 2D point to another, with a given color and width. It can be optionally antialiased.
 			</description>
 		</method>
+		<method name="draw_multiline">
+			<return type="void">
+			</return>
+			<argument index="0" name="points" type="PoolVector2Array">
+			</argument>
+			<argument index="1" name="color" type="Color">
+			</argument>
+			<argument index="2" name="width" type="float" default="1.0">
+			</argument>
+			<argument index="3" name="antialiased" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="draw_multiline_colors">
+			<return type="void">
+			</return>
+			<argument index="0" name="points" type="PoolVector2Array">
+			</argument>
+			<argument index="1" name="colors" type="PoolColorArray">
+			</argument>
+			<argument index="2" name="width" type="float" default="1.0">
+			</argument>
+			<argument index="3" name="antialiased" type="bool" default="false">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="draw_polygon">
 			<return type="void">
 			</return>
diff --git a/doc/classes/ConcavePolygonShape2D.xml b/doc/classes/ConcavePolygonShape2D.xml
index 1910b1d62d..1d2aabd6ea 100644
--- a/doc/classes/ConcavePolygonShape2D.xml
+++ b/doc/classes/ConcavePolygonShape2D.xml
@@ -15,6 +15,7 @@
 	</methods>
 	<members>
 		<member name="segments" type="PoolVector2Array" setter="set_segments" getter="get_segments">
+			The array of points that make up the [code]ConcavePolygonShape2D[/code]'s line segments.
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/CubeMap.xml b/doc/classes/CubeMap.xml
index a7857dba78..30022efd38 100644
--- a/doc/classes/CubeMap.xml
+++ b/doc/classes/CubeMap.xml
@@ -4,7 +4,7 @@
 		A CubeMap is a 6 sided 3D texture.
 	</brief_description>
 	<description>
-		A CubeMap is a 6 sided 3D texture typically used for faking reflections. It can be used to make an object look as if it's reflecting its surroundings. This usually delivers much better performance than other reflection methods.
+		A 6-sided 3D texture typically used for faking reflections. It can be used to make an object look as if it's reflecting its surroundings. This usually delivers much better performance than other reflection methods.
 	</description>
 	<tutorials>
 	</tutorials>
@@ -15,13 +15,14 @@
 			<return type="int">
 			</return>
 			<description>
+				Returns the render flags for the [code]CubeMap[/code]. See the [code]FLAG_*[/code] constants for details.
 			</description>
 		</method>
 		<method name="get_height" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Returns the CubeMap's height.
+				Returns the [code]CubeMap[/code]'s height.
 			</description>
 		</method>
 		<method name="get_side" qualifiers="const">
@@ -30,14 +31,14 @@
 			<argument index="0" name="side" type="int" enum="CubeMap.Side">
 			</argument>
 			<description>
-				Returns an [Image] for a side of the CubeMap using one of the [code]SIDE_*[/code] constants or an integer 0-5.
+				Returns an [Image] for a side of the [code]CubeMap[/code] using one of the [code]SIDE_*[/code] constants or an integer 0-5.
 			</description>
 		</method>
 		<method name="get_width" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Returns the CubeMap's width.
+				Returns the [code]CubeMap[/code]'s width.
 			</description>
 		</method>
 		<method name="set_flags">
@@ -46,6 +47,7 @@
 			<argument index="0" name="flags" type="int">
 			</argument>
 			<description>
+				Returns the render flags for the [code]CubeMap[/code]. See the [code]FLAG_*[/code] constants for details.
 			</description>
 		</method>
 		<method name="set_side">
@@ -56,44 +58,57 @@
 			<argument index="1" name="image" type="Image">
 			</argument>
 			<description>
-				Sets an [Image] for a side of the CubeMap using one of the [code]SIDE_*[/code] constants or an integer 0-5.
+				Sets an [Image] for a side of the [code]CubeMap[/code] using one of the [code]SIDE_*[/code] constants or an integer 0-5.
 			</description>
 		</method>
 	</methods>
 	<members>
 		<member name="lossy_storage_quality" type="float" setter="set_lossy_storage_quality" getter="get_lossy_storage_quality">
-			The lossy storage quality of the CubeMap if the storage mode is set to STORAGE_COMPRESS_LOSSY.
+			The lossy storage quality of the [code]CubeMap[/code] if the storage mode is set to STORAGE_COMPRESS_LOSSY.
 		</member>
 		<member name="storage_mode" type="int" setter="set_storage" getter="get_storage" enum="CubeMap.Storage">
-			The CubeMap's storage mode. See [code]STORAGE_*[/code] constants.
+			The [code]CubeMap[/code]'s storage mode. See [code]STORAGE_*[/code] constants.
 		</member>
 	</members>
 	<constants>
 		<constant name="STORAGE_RAW" value="0" enum="Storage">
+			Store the [code]CubeMap[/code] without any compression.
 		</constant>
 		<constant name="STORAGE_COMPRESS_LOSSY" value="1" enum="Storage">
+			Store the [code]CubeMap[/code] with strong compression that reduces image quality.
 		</constant>
 		<constant name="STORAGE_COMPRESS_LOSSLESS" value="2" enum="Storage">
+			Store the [code]CubeMap[/code] with moderate compression that doesn't reduce image quality.
 		</constant>
 		<constant name="SIDE_LEFT" value="0" enum="Side">
+			Identifier for the left face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="SIDE_RIGHT" value="1" enum="Side">
+			Identifier for the right face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="SIDE_BOTTOM" value="2" enum="Side">
+			Identifier for the bottom face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="SIDE_TOP" value="3" enum="Side">
+			Identifier for the top face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="SIDE_FRONT" value="4" enum="Side">
+			Identifier for the front face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="SIDE_BACK" value="5" enum="Side">
+			Identifier for the back face of the [code]CubeMap[/code].
 		</constant>
 		<constant name="FLAG_MIPMAPS" value="1" enum="Flags">
+			Generate mipmaps, to enable smooth zooming out of the texture.
 		</constant>
 		<constant name="FLAG_REPEAT" value="2" enum="Flags">
+			Repeat (instead of clamp to edge).
 		</constant>
 		<constant name="FLAG_FILTER" value="4" enum="Flags">
+			Turn on magnifying filter, to enable smooth zooming in of the texture.
 		</constant>
 		<constant name="FLAGS_DEFAULT" value="7" enum="Flags">
+			Default flags. Generate mipmaps, repeat, and filter are enabled.
 		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/EditorPlugin.xml b/doc/classes/EditorPlugin.xml
index edd1f721b5..ada0ee56a8 100644
--- a/doc/classes/EditorPlugin.xml
+++ b/doc/classes/EditorPlugin.xml
@@ -82,6 +82,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="add_scene_import_plugin">
+			<return type="void">
+			</return>
+			<argument index="0" name="scene_importer" type="EditorSceneImporter">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="add_tool_submenu_item">
 			<return type="void">
 			</return>
@@ -176,6 +184,12 @@
 			<description>
 			</description>
 		</method>
+		<method name="get_plugin_icon" qualifiers="virtual">
+			<return type="Object">
+			</return>
+			<description>
+			</description>
+		</method>
 		<method name="get_plugin_name" qualifiers="virtual">
 			<return type="String">
 			</return>
@@ -295,6 +309,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="remove_scene_import_plugin">
+			<return type="void">
+			</return>
+			<argument index="0" name="scene_importer" type="EditorSceneImporter">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="save_external_data" qualifiers="virtual">
 			<return type="void">
 			</return>
diff --git a/doc/classes/EditorResourceConversionPlugin.xml b/doc/classes/EditorResourceConversionPlugin.xml
index b2bbe69061..8d6aa3a605 100644
--- a/doc/classes/EditorResourceConversionPlugin.xml
+++ b/doc/classes/EditorResourceConversionPlugin.xml
@@ -18,7 +18,7 @@
 			</description>
 		</method>
 		<method name="_converts_to" qualifiers="virtual">
-			<return type="bool">
+			<return type="String">
 			</return>
 			<description>
 			</description>
diff --git a/doc/classes/EditorSceneImporter.xml b/doc/classes/EditorSceneImporter.xml
new file mode 100644
index 0000000000..69fe4050f4
--- /dev/null
+++ b/doc/classes/EditorSceneImporter.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<class name="EditorSceneImporter" inherits="Reference" category="Core" version="3.0-beta">
+	<brief_description>
+	</brief_description>
+	<description>
+	</description>
+	<tutorials>
+	</tutorials>
+	<demos>
+	</demos>
+	<methods>
+		<method name="_get_extensions" qualifiers="virtual">
+			<return type="Array">
+			</return>
+			<description>
+			</description>
+		</method>
+		<method name="_get_import_flags" qualifiers="virtual">
+			<return type="int">
+			</return>
+			<description>
+			</description>
+		</method>
+		<method name="_import_animation" qualifiers="virtual">
+			<return type="Animation">
+			</return>
+			<argument index="0" name="path" type="String">
+			</argument>
+			<argument index="1" name="flags" type="int">
+			</argument>
+			<argument index="2" name="bake_fps" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="_import_scene" qualifiers="virtual">
+			<return type="Node">
+			</return>
+			<argument index="0" name="path" type="String">
+			</argument>
+			<argument index="1" name="flags" type="int">
+			</argument>
+			<argument index="2" name="bake_fps" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="import_animation_from_other_importer">
+			<return type="Animation">
+			</return>
+			<argument index="0" name="path" type="String">
+			</argument>
+			<argument index="1" name="flags" type="int">
+			</argument>
+			<argument index="2" name="bake_fps" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="import_scene_from_other_importer">
+			<return type="Node">
+			</return>
+			<argument index="0" name="path" type="String">
+			</argument>
+			<argument index="1" name="flags" type="int">
+			</argument>
+			<argument index="2" name="bake_fps" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+	</methods>
+	<constants>
+		<constant name="IMPORT_SCENE" value="1">
+		</constant>
+		<constant name="IMPORT_ANIMATION" value="2">
+		</constant>
+		<constant name="IMPORT_ANIMATION_DETECT_LOOP" value="4">
+		</constant>
+		<constant name="IMPORT_ANIMATION_OPTIMIZE" value="8">
+		</constant>
+		<constant name="IMPORT_ANIMATION_FORCE_ALL_TRACKS_IN_ALL_CLIPS" value="16">
+		</constant>
+		<constant name="IMPORT_ANIMATION_KEEP_VALUE_TRACKS" value="32">
+		</constant>
+		<constant name="IMPORT_GENERATE_TANGENT_ARRAYS" value="256">
+		</constant>
+		<constant name="IMPORT_FAIL_ON_MISSING_DEPENDENCIES" value="512">
+		</constant>
+		<constant name="IMPORT_MATERIALS_IN_INSTANCES" value="1024">
+		</constant>
+		<constant name="IMPORT_USE_COMPRESSION" value="2048">
+		</constant>
+	</constants>
+</class>
diff --git a/doc/classes/HTTPClient.xml b/doc/classes/HTTPClient.xml
index b1526b64c5..b90c49b5c0 100644
--- a/doc/classes/HTTPClient.xml
+++ b/doc/classes/HTTPClient.xml
@@ -4,8 +4,10 @@
 		Hyper-text transfer protocol client.
 	</brief_description>
 	<description>
-		Hyper-text transfer protocol client. Supports SSL and SSL server certificate verification.
-		Can be reused to connect to different hosts and make many requests.
+		Hyper-text transfer protocol client (sometimes called "User Agent"). Used to make HTTP requests to download web content, upload files and other data or to communicate with various services, among other use cases.
+		Note that this client only needs to connect to a host once (see [method connect_to_host]) to send multiple requests. Because of this, methods that take URLs usually take just the part after the host instead of the full URL, as the client is already connected to a host. See [method request] for a full example and to get started.
+		A [code]HTTPClient[/code] should be reused between multiple requests or to connect to different hosts instead of creating one client per request. Supports SSL and SSL server certificate verification. HTTP status codes in the 2xx range indicate success, 3xx redirection (i.e. "try again, but over here"), 4xx something was wrong with the request, and 5xx something went wrong on the server's side.
+		For more information on HTTP, see https://developer.mozilla.org/en-US/docs/Web/HTTP (or read RFC 2616 to get it straight from the source: https://tools.ietf.org/html/rfc2616).
 	</description>
 	<tutorials>
 	</tutorials>
@@ -16,7 +18,7 @@
 			<return type="void">
 			</return>
 			<description>
-				Cloces the current connection, allows for reusal of [code]HTTPClient[/code].
+				Closes the current connection, allowing reuse of this [code]HTTPClient[/code].
 			</description>
 		</method>
 		<method name="connect_to_host">
@@ -33,35 +35,35 @@
 			<description>
 				Connect to a host. This needs to be done before any requests are sent.
 				The host should not have http:// prepended but will strip the protocol identifier if provided.
-				verify_host will check the SSL identity of the host if set to true.
+				[code]verify_host[/code] will check the SSL identity of the host if set to [code]true[/code].
 			</description>
 		</method>
 		<method name="get_connection" qualifiers="const">
 			<return type="StreamPeer">
 			</return>
 			<description>
-				Return current connection.
+				Returns the current connection.
 			</description>
 		</method>
 		<method name="get_response_body_length" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Return the response's body length.
+				Returns the response's body length.
 			</description>
 		</method>
 		<method name="get_response_code" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Return the HTTP status code of the response.
+				Returns the response's HTTP status code.
 			</description>
 		</method>
 		<method name="get_response_headers">
 			<return type="PoolStringArray">
 			</return>
 			<description>
-				Return the response headers.
+				Returns the response headers.
 			</description>
 		</method>
 		<method name="get_response_headers_as_dictionary">
@@ -84,21 +86,21 @@
 			<return type="bool">
 			</return>
 			<description>
-				Return whether this [code]HTTPClient[/code] has a response available.
+				If [code]true[/code] this [code]HTTPClient[/code] has a response available.
 			</description>
 		</method>
 		<method name="is_blocking_mode_enabled" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
-				Return whether blocking mode is enabled.
+				If [code]true[/code] blocking mode is enabled.
 			</description>
 		</method>
 		<method name="is_response_chunked" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
-				Return whether this [code]HTTPClient[/code] has a response that is chunked.
+				If [code]true[/code] this [code]HTTPClient[/code] has a response that is chunked.
 			</description>
 		</method>
 		<method name="poll">
@@ -141,8 +143,8 @@
 			<argument index="3" name="body" type="String" default="&quot;&quot;">
 			</argument>
 			<description>
-				Sends a request to the connected host. The url is what is normally behind the hostname, i.e. in [code]http://somehost.com/index.php[/code], url would be "index.php".
-				Headers are HTTP request headers.
+				Sends a request to the connected host. The URL parameter is just the part after the host, so for [code]http://somehost.com/index.php[/code], it is [code]index.php[/code].
+				Headers are HTTP request headers. For available HTTP methods, see [code]METHOD_*[/code].
 				To create a POST request with query strings to push to the server, do:
 				[codeblock]
 				var fields = {"username" : "user", "password" : "pass"}
@@ -164,9 +166,9 @@
 			<argument index="3" name="body" type="PoolByteArray">
 			</argument>
 			<description>
-				Sends a raw request to the connected host. The url is what is normally behind the hostname, i.e. in [code]http://somehost.com/index.php[/code], url would be "index.php".
-				Headers are HTTP request headers.
-				Sends body raw, as a byte array, does not encode it in any way.
+				Sends a raw request to the connected host. The URL parameter is just the part after the host, so for [code]http://somehost.com/index.php[/code], it is [code]index.php[/code].
+				Headers are HTTP request headers. For available HTTP methods, see [code]METHOD_*[/code].
+				Sends the body data raw, as a byte array and does not encode it in any way.
 			</description>
 		</method>
 		<method name="set_blocking_mode">
@@ -184,7 +186,7 @@
 			<argument index="0" name="connection" type="StreamPeer">
 			</argument>
 			<description>
-				Set connection to use, for this client.
+				Sets connection to use for this client.
 			</description>
 		</method>
 		<method name="set_read_chunk_size">
@@ -199,140 +201,208 @@
 	</methods>
 	<constants>
 		<constant name="METHOD_GET" value="0" enum="Method">
+			HTTP GET method. The GET method requests a representation of the specified resource. Requests using GET should only retrieve data.
 		</constant>
 		<constant name="METHOD_HEAD" value="1" enum="Method">
+			HTTP HEAD method. The HEAD method asks for a response identical to that of a GET request, but without the response body. This is useful to request metadata like HTTP headers or to check if a resource exists.
 		</constant>
 		<constant name="METHOD_POST" value="2" enum="Method">
+			HTTP POST method. The POST method is used to submit an entity to the specified resource, often causing a change in state or side effects on the server. This is often used for forms and submitting data or uploading files.
 		</constant>
 		<constant name="METHOD_PUT" value="3" enum="Method">
+			HTTP PUT method. The PUT method asks to replace all current representations of the target resource with the request payload. (You can think of [code]POST[/code] as "create or update" and [code]PUT[/code] as "update", although many services tend to not make a clear distinction or change their meaning).
 		</constant>
 		<constant name="METHOD_DELETE" value="4" enum="Method">
+			HTTP DELETE method. The DELETE method requests to delete the specified resource.
 		</constant>
 		<constant name="METHOD_OPTIONS" value="5" enum="Method">
+			HTTP OPTIONS method. The OPTIONS method asks for a description of the communication options for the target resource. Rarely used.
 		</constant>
 		<constant name="METHOD_TRACE" value="6" enum="Method">
+			HTTP TRACE method. The TRACE method performs a message loop-back test along the path to the target resource. Returns the entire HTTP request received in the response body. Rarely used.
 		</constant>
 		<constant name="METHOD_CONNECT" value="7" enum="Method">
+			HTTP CONNECT method. The CONNECT method establishes a tunnel to the server identified by the target resource. Rarely used.
 		</constant>
 		<constant name="METHOD_MAX" value="8" enum="Method">
+			Marker for end of [code]METHOD_*[/code] enum. Not used.
 		</constant>
 		<constant name="STATUS_DISCONNECTED" value="0" enum="Status">
+			Status: Disconnected from the server.
 		</constant>
 		<constant name="STATUS_RESOLVING" value="1" enum="Status">
+			Status: Currently resolving the hostname for the given URL into an IP.
 		</constant>
 		<constant name="STATUS_CANT_RESOLVE" value="2" enum="Status">
+			Status: DNS failure: Can't resolve the hostname for the given URL.
 		</constant>
 		<constant name="STATUS_CONNECTING" value="3" enum="Status">
+			Status: Currently connecting to server.
 		</constant>
 		<constant name="STATUS_CANT_CONNECT" value="4" enum="Status">
+			Status: Can't connect to the server.
 		</constant>
 		<constant name="STATUS_CONNECTED" value="5" enum="Status">
+			Status: Connection established.
 		</constant>
 		<constant name="STATUS_REQUESTING" value="6" enum="Status">
+			Status: Currently sending request.
 		</constant>
 		<constant name="STATUS_BODY" value="7" enum="Status">
+			Status: HTTP body received.
 		</constant>
 		<constant name="STATUS_CONNECTION_ERROR" value="8" enum="Status">
+			Status: Error in HTTP connection.
 		</constant>
 		<constant name="STATUS_SSL_HANDSHAKE_ERROR" value="9" enum="Status">
+			Status: Error in SSL handshake.
 		</constant>
 		<constant name="RESPONSE_CONTINUE" value="100" enum="ResponseCode">
+			HTTP status code [code]100 Continue[/code]. Interim response that indicates everything so far is OK and that the client should continue with the request (or ignore this status if already finished).
 		</constant>
 		<constant name="RESPONSE_SWITCHING_PROTOCOLS" value="101" enum="ResponseCode">
+			HTTP status code [code]101 Switching Protocol[/code]. Sent in response to an [code]Upgrade[/code] request header by the client. Indicates the protocol the server is switching to.
 		</constant>
 		<constant name="RESPONSE_PROCESSING" value="102" enum="ResponseCode">
+			HTTP status code [code]102 Processing[/code] (WebDAV). Indicates that the server has received and is processing the request, but no response is available yet.
 		</constant>
 		<constant name="RESPONSE_OK" value="200" enum="ResponseCode">
+			HTTP status code [code]200 OK[/code]. The request has succeeded. Default response for successful requests. Meaning varies depending on the request. GET: The resource has been fetched and is transmitted in the message body. HEAD: The entity headers are in the message body. POST: The resource describing the result of the action is transmitted in the message body. TRACE: The message body contains the request message as received by the server.
 		</constant>
 		<constant name="RESPONSE_CREATED" value="201" enum="ResponseCode">
+			HTTP status code [code]201 Created[/code]. The request has succeeded and a new resource has been created as a result of it. This is typically the response sent after a PUT request.
 		</constant>
 		<constant name="RESPONSE_ACCEPTED" value="202" enum="ResponseCode">
+			HTTP status code [code]202 Accepted[/code]. The request has been received but not yet acted upon. It is non-committal, meaning that there is no way in HTTP to later send an asynchronous response indicating the outcome of processing the request. It is intended for cases where another process or server handles the request, or for batch processing.
 		</constant>
 		<constant name="RESPONSE_NON_AUTHORITATIVE_INFORMATION" value="203" enum="ResponseCode">
+			HTTP status code [code]203 Non-Authoritative Information[/code]. This response code means returned meta-information set is not exact set as available from the origin server, but collected from a local or a third party copy. Except this condition, 200 OK response should be preferred instead of this response.
 		</constant>
 		<constant name="RESPONSE_NO_CONTENT" value="204" enum="ResponseCode">
+			HTTP status code [code]204 No Content[/code]. There is no content to send for this request, but the headers may be useful. The user-agent may update its cached headers for this resource with the new ones.
 		</constant>
 		<constant name="RESPONSE_RESET_CONTENT" value="205" enum="ResponseCode">
+			HTTP status code [code]205 Reset Content[/code]. The server has fulfilled the request and desires that the client resets the "document view" that caused the request to be sent to its original state as received from the origin server.
 		</constant>
 		<constant name="RESPONSE_PARTIAL_CONTENT" value="206" enum="ResponseCode">
+			HTTP status code [code]206 Partial Content[/code]. This response code is used because of a range header sent by the client to separate download into multiple streams.
 		</constant>
 		<constant name="RESPONSE_MULTI_STATUS" value="207" enum="ResponseCode">
+			HTTP status code [code]207 Multi-Status[/code] (WebDAV). A Multi-Status response conveys information about multiple resources in situations where multiple status codes might be appropriate.
 		</constant>
 		<constant name="RESPONSE_IM_USED" value="226" enum="ResponseCode">
+			HTTP status code [code]226 IM Used[/code] (WebDAV). The server has fulfilled a GET request for the resource, and the response is a representation of the result of one or more instance-manipulations applied to the current instance.
 		</constant>
 		<constant name="RESPONSE_MULTIPLE_CHOICES" value="300" enum="ResponseCode">
+			HTTP status code [code]300 Multiple Choice[/code]. The request has more than one possible responses and there is no standardized way to choose one of the responses. User-agent or user should choose one of them.
 		</constant>
 		<constant name="RESPONSE_MOVED_PERMANENTLY" value="301" enum="ResponseCode">
+			HTTP status code [code]301 Moved Permanently[/code]. Redirection. This response code means the URI of requested resource has been changed. The new URI is usually included in the response.
 		</constant>
 		<constant name="RESPONSE_FOUND" value="302" enum="ResponseCode">
+			HTTP status code [code]302 Found[/code]. Temporary redirection. This response code means the URI of requested resource has been changed temporarily. New changes in the URI might be made in the future. Therefore, this same URI should be used by the client in future requests.
 		</constant>
 		<constant name="RESPONSE_SEE_OTHER" value="303" enum="ResponseCode">
+			HTTP status code [code]303 See Other[/code]. The server is redirecting the user agent to a different resource, as indicated by a URI in the Location header field, which is intended to provide an indirect response to the original request.
 		</constant>
 		<constant name="RESPONSE_NOT_MODIFIED" value="304" enum="ResponseCode">
+			HTTP status code [code]304 Not Modified[/code]. A conditional GET or HEAD request has been received and would have resulted in a 200 OK response if it were not for the fact that the condition evaluated to false.
 		</constant>
 		<constant name="RESPONSE_USE_PROXY" value="305" enum="ResponseCode">
+			HTTP status code [code]305 Use Proxy[/code]. Deprecated. Do not use.
 		</constant>
 		<constant name="RESPONSE_TEMPORARY_REDIRECT" value="307" enum="ResponseCode">
+			HTTP status code [code]307 Temporary Redirect[/code]. The target resource resides temporarily under a different URI and the user agent MUST NOT change the request method if it performs an automatic redirection to that URI.
 		</constant>
 		<constant name="RESPONSE_BAD_REQUEST" value="400" enum="ResponseCode">
+			HTTP status code [code]400 Bad Request[/code]. The request was invalid. The server cannot or will not process the request due to something that is perceived to be a client error (e.g., malformed request syntax, invalid request message framing, invalid request contents, or deceptive request routing).
 		</constant>
 		<constant name="RESPONSE_UNAUTHORIZED" value="401" enum="ResponseCode">
+			HTTP status code [code]401 Unauthorized[/code]. Credentials required. The request has not been applied because it lacks valid authentication credentials for the target resource.
 		</constant>
 		<constant name="RESPONSE_PAYMENT_REQUIRED" value="402" enum="ResponseCode">
+			HTTP status code [code]402 Payment Required[/code]. This response code is reserved for future use. Initial aim for creating this code was using it for digital payment systems, however this is not currently used.
 		</constant>
 		<constant name="RESPONSE_FORBIDDEN" value="403" enum="ResponseCode">
+			HTTP status code [code]403 Forbidden[/code]. The client does not have access rights to the content, i.e. they are unauthorized, so server is rejecting to give proper response. Unlike [code]401[/code], the client's identity is known to the server.
 		</constant>
 		<constant name="RESPONSE_NOT_FOUND" value="404" enum="ResponseCode">
+			HTTP status code [code]404 Not Found[/code]. The server can not find requested resource. Either the URL is not recognized or the endpoint is valid but the resource itself does not exist. May also be sent instead of 403 to hide existence of a resource if the client is not authorized.
 		</constant>
 		<constant name="RESPONSE_METHOD_NOT_ALLOWED" value="405" enum="ResponseCode">
+			HTTP status code [code]405 Method Not Allowed[/code]. The request's HTTP method is known by the server but has been disabled and cannot be used. For example, an API may forbid DELETE-ing a resource. The two mandatory methods, GET and HEAD, must never be disabled and should not return this error code.
 		</constant>
 		<constant name="RESPONSE_NOT_ACCEPTABLE" value="406" enum="ResponseCode">
+			HTTP status code [code]406 Not Acceptable[/code]. The target resource does not have a current representation that would be acceptable to the user agent, according to the proactive negotiation header fields received in the request. Used when negotiation content.
 		</constant>
 		<constant name="RESPONSE_PROXY_AUTHENTICATION_REQUIRED" value="407" enum="ResponseCode">
+			HTTP status code [code]407 Proxy Authentication Required[/code]. Similar to 401 Unauthorized, but it indicates that the client needs to authenticate itself in order to use a proxy.
 		</constant>
 		<constant name="RESPONSE_REQUEST_TIMEOUT" value="408" enum="ResponseCode">
+			HTTP status code [code]408 Request Timeout[/code]. The server did not receive a complete request message within the time that it was prepared to wait.
 		</constant>
 		<constant name="RESPONSE_CONFLICT" value="409" enum="ResponseCode">
+			HTTP status code [code]409 Conflict[/code]. The request could not be completed due to a conflict with the current state of the target resource. This code is used in situations where the user might be able to resolve the conflict and resubmit the request.
 		</constant>
 		<constant name="RESPONSE_GONE" value="410" enum="ResponseCode">
+			HTTP status code [code]410 Gone[/code]. The target resource is no longer available at the origin server and this condition is likely permanent.
 		</constant>
 		<constant name="RESPONSE_LENGTH_REQUIRED" value="411" enum="ResponseCode">
+			HTTP status code [code]411 Length Required[/code]. The server refuses to accept the request without a defined Content-Length header.
 		</constant>
 		<constant name="RESPONSE_PRECONDITION_FAILED" value="412" enum="ResponseCode">
+			HTTP status code [code]412 Precondition Failed[/code]. One or more conditions given in the request header fields evaluated to false when tested on the server.
 		</constant>
 		<constant name="RESPONSE_REQUEST_ENTITY_TOO_LARGE" value="413" enum="ResponseCode">
+			HTTP status code [code]413 Entity Too Large[/code]. The server is refusing to process a request because the request payload is larger than the server is willing or able to process.
 		</constant>
 		<constant name="RESPONSE_REQUEST_URI_TOO_LONG" value="414" enum="ResponseCode">
+			HTTP status code [code]414 Request-URI Too Long[/code]. The server is refusing to service the request because the request-target is longer than the server is willing to interpret.
 		</constant>
 		<constant name="RESPONSE_UNSUPPORTED_MEDIA_TYPE" value="415" enum="ResponseCode">
+			HTTP status code [code]415 Unsupported Media Type[/code]. The origin server is refusing to service the request because the payload is in a format not supported by this method on the target resource.
 		</constant>
 		<constant name="RESPONSE_REQUESTED_RANGE_NOT_SATISFIABLE" value="416" enum="ResponseCode">
+			HTTP status code [code]416 Requested Range Not Satisfiable[/code]. None of the ranges in the request's Range header field overlap the current extent of the selected resource or the set of ranges requested has been rejected due to invalid ranges or an excessive request of small or overlapping ranges.
 		</constant>
 		<constant name="RESPONSE_EXPECTATION_FAILED" value="417" enum="ResponseCode">
+			HTTP status code [code]417 Expectation Failed[/code]. The expectation given in the request's Expect header field could not be met by at least one of the inbound servers.
 		</constant>
 		<constant name="RESPONSE_UNPROCESSABLE_ENTITY" value="422" enum="ResponseCode">
+			HTTP status code [code]422 Unprocessable Entity[/code] (WebDAV). The server understands the content type of the request entity (hence a 415 Unsupported Media Type status code is inappropriate), and the syntax of the request entity is correct (thus a 400 Bad Request status code is inappropriate) but was unable to process the contained instructions.
 		</constant>
 		<constant name="RESPONSE_LOCKED" value="423" enum="ResponseCode">
+			HTTP status code [code]423 Locked[/code] (WebDAV). The source or destination resource of a method is locked.
 		</constant>
 		<constant name="RESPONSE_FAILED_DEPENDENCY" value="424" enum="ResponseCode">
+			HTTP status code [code]424 Failed Dependency[/code] (WebDAV). The method could not be performed on the resource because the requested action depended on another action and that action failed.
 		</constant>
 		<constant name="RESPONSE_UPGRADE_REQUIRED" value="426" enum="ResponseCode">
+			HTTP status code [code]426 Upgrade Required[/code]. The server refuses to perform the request using the current protocol but might be willing to do so after the client upgrades to a different protocol.
 		</constant>
 		<constant name="RESPONSE_INTERNAL_SERVER_ERROR" value="500" enum="ResponseCode">
+			HTTP status code [code]500 Internal Server Error[/code]. The server encountered an unexpected condition that prevented it from fulfilling the request.
 		</constant>
 		<constant name="RESPONSE_NOT_IMPLEMENTED" value="501" enum="ResponseCode">
+			HTTP status code [code]501 Not Implemented[/code]. The server does not support the functionality required to fulfill the request.
 		</constant>
 		<constant name="RESPONSE_BAD_GATEWAY" value="502" enum="ResponseCode">
+			HTTP status code [code]502 Bad Gateway[/code]. The server, while acting as a gateway or proxy, received an invalid response from an inbound server it accessed while attempting to fulfill the request. Usually returned by load balancers or proxies.
 		</constant>
 		<constant name="RESPONSE_SERVICE_UNAVAILABLE" value="503" enum="ResponseCode">
+			HTTP status code [code]503 Service Unavailable[/code]. The server is currently unable to handle the request due to a temporary overload or scheduled maintenance, which will likely be alleviated after some delay. Try again later.
 		</constant>
 		<constant name="RESPONSE_GATEWAY_TIMEOUT" value="504" enum="ResponseCode">
+			HTTP status code [code]504 Gateway Timeout[/code]. The server, while acting as a gateway or proxy, did not receive a timely response from an upstream server it needed to access in order to complete the request. Usually returned by load balancers or proxies.
 		</constant>
 		<constant name="RESPONSE_HTTP_VERSION_NOT_SUPPORTED" value="505" enum="ResponseCode">
+			HTTP status code [code]505 HTTP Version Not Supported[/code]. The server does not support, or refuses to support, the major version of HTTP that was used in the request message.
 		</constant>
 		<constant name="RESPONSE_INSUFFICIENT_STORAGE" value="507" enum="ResponseCode">
+			HTTP status code [code]507 Insufficient Storage[/code]. The method could not be performed on the resource because the server is unable to store the representation needed to successfully complete the request.
 		</constant>
 		<constant name="RESPONSE_NOT_EXTENDED" value="510" enum="ResponseCode">
+			HTTP status code [code]510 Not Extended[/code]. The policy for accessing the resource has not been met in the request. The server should send back all the information necessary for the client to issue an extended request.
 		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/HTTPRequest.xml b/doc/classes/HTTPRequest.xml
index 7c37479295..985198c76f 100644
--- a/doc/classes/HTTPRequest.xml
+++ b/doc/classes/HTTPRequest.xml
@@ -1,11 +1,11 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="HTTPRequest" inherits="Node" category="Core" version="3.0-beta">
 	<brief_description>
-		A Node with the ability to send HTTP requests.
+		A node with the ability to send HTTP requests.
 	</brief_description>
 	<description>
-		A Node with the ability to send HTTP requests. Uses a [HTTPClient] internally, supports HTTPS.
-		Can be used to make HTTP requests or download files via HTTP.
+		A node with the ability to send HTTP requests. Uses [HTTPClient] internally.
+		Can be used to make HTTP requests, i.e. download or upload files or web content via HTTP.
 	</description>
 	<tutorials>
 	</tutorials>
@@ -16,35 +16,35 @@
 			<return type="void">
 			</return>
 			<description>
-				Cancel the current request.
+				Cancels the current request.
 			</description>
 		</method>
 		<method name="get_body_size" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Return the response body length.
+				Returns the response body length.
 			</description>
 		</method>
 		<method name="get_download_file" qualifiers="const">
 			<return type="String">
 			</return>
 			<description>
-				Return the file this request will download into.
+				Returns the file this request will download into.
 			</description>
 		</method>
 		<method name="get_downloaded_bytes" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Return the amount of bytes this HTTPRequest downloaded.
+				Returns the amount of bytes this HTTPRequest downloaded.
 			</description>
 		</method>
 		<method name="get_http_client_status" qualifiers="const">
 			<return type="int" enum="HTTPClient.Status">
 			</return>
 			<description>
-				Return the current status of the underlying [HTTPClient].
+				Returns the current status of the underlying [HTTPClient]. See [code]STATUS_*[/code] enum on [HTTPClient].
 			</description>
 		</method>
 		<method name="request">
@@ -69,16 +69,19 @@
 			<argument index="0" name="path" type="String">
 			</argument>
 			<description>
-				Set the file to download into. Outputs the response body into the file.
+				Sets the file to download into. Outputs the response body into the file if set.
 			</description>
 		</method>
 	</methods>
 	<members>
 		<member name="body_size_limit" type="int" setter="set_body_size_limit" getter="get_body_size_limit">
+			Maximum allowed size for response bodies.
 		</member>
 		<member name="max_redirects" type="int" setter="set_max_redirects" getter="get_max_redirects">
+			Maximum number of allowed redirects.
 		</member>
 		<member name="use_threads" type="bool" setter="set_use_threads" getter="is_using_threads">
+			If [code]true[/code] multithreading is used to improve performance.
 		</member>
 	</members>
 	<signals>
@@ -121,7 +124,7 @@
 			Request exceeded its maximum size limit, see [method set_body_size_limit].
 		</constant>
 		<constant name="RESULT_REQUEST_FAILED" value="8" enum="Result">
-			Request failed. (unused)
+			Request failed. (Unused)
 		</constant>
 		<constant name="RESULT_DOWNLOAD_FILE_CANT_OPEN" value="9" enum="Result">
 			HTTPRequest couldn't open the download file.
diff --git a/doc/classes/IP.xml b/doc/classes/IP.xml
index 09734e746c..cd669e3de1 100644
--- a/doc/classes/IP.xml
+++ b/doc/classes/IP.xml
@@ -1,10 +1,10 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="IP" inherits="Object" category="Core" version="3.0-beta">
 	<brief_description>
-		IP Protocol support functions.
+		Internet protocol (IP) support functions like DNS resolution.
 	</brief_description>
 	<description>
-		IP contains support functions for the IPv4 protocol. TCP/IP support is in different classes (see [StreamPeerTCP] and [TCP_Server]). IP provides hostname resolution support, both blocking and threaded.
+		IP contains support functions for the Internet Protocol (IP). TCP/IP support is in different classes (see [StreamPeerTCP] and [TCP_Server]). IP provides DNS hostname resolution support, both blocking and threaded.
 	</description>
 	<tutorials>
 	</tutorials>
@@ -79,24 +79,34 @@
 	</methods>
 	<constants>
 		<constant name="RESOLVER_STATUS_NONE" value="0" enum="ResolverStatus">
+			DNS hostname resolver status: No status.
 		</constant>
 		<constant name="RESOLVER_STATUS_WAITING" value="1" enum="ResolverStatus">
+			DNS hostname resolver status: Waiting.
 		</constant>
 		<constant name="RESOLVER_STATUS_DONE" value="2" enum="ResolverStatus">
+			DNS hostname resolver status: Done.
 		</constant>
 		<constant name="RESOLVER_STATUS_ERROR" value="3" enum="ResolverStatus">
+			DNS hostname resolver status: Error.
 		</constant>
 		<constant name="RESOLVER_MAX_QUERIES" value="32">
+			Maximum number of concurrent DNS resolver queries allowed, [code]RESOLVER_INVALID_ID[/code] is returned if exceeded.
 		</constant>
 		<constant name="RESOLVER_INVALID_ID" value="-1">
+			Invalid ID constant. Returned if [code]RESOLVER_MAX_QUERIES[/code] is exceeded.
 		</constant>
 		<constant name="TYPE_NONE" value="0" enum="Type">
+			Address type: None.
 		</constant>
 		<constant name="TYPE_IPV4" value="1" enum="Type">
+			Address type: Internet protocol version 4 (IPv4).
 		</constant>
 		<constant name="TYPE_IPV6" value="2" enum="Type">
+			Address type: Internet protocol version 6 (IPv6).
 		</constant>
 		<constant name="TYPE_ANY" value="3" enum="Type">
+			Address type: Any.
 		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/IP_Unix.xml b/doc/classes/IP_Unix.xml
index 28e8a3804b..1d376c7233 100644
--- a/doc/classes/IP_Unix.xml
+++ b/doc/classes/IP_Unix.xml
@@ -1,8 +1,10 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="IP_Unix" inherits="IP" category="Core" version="3.0-beta">
 	<brief_description>
+		Unix IP support. See [IP].
 	</brief_description>
 	<description>
+		Unix-specific implementation of IP support functions. See [IP].
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/KinematicBody.xml b/doc/classes/KinematicBody.xml
index c5204fd0bf..d1dc236d40 100644
--- a/doc/classes/KinematicBody.xml
+++ b/doc/classes/KinematicBody.xml
@@ -102,6 +102,18 @@
 		</method>
 	</methods>
 	<members>
+		<member name="axis_lock_angular_x" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_angular_y" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_angular_z" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_x" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_y" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_z" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
 		<member name="collision/safe_margin" type="float" setter="set_safe_margin" getter="get_safe_margin">
 			If the body is at least this close to another body, this body will consider them to be colliding.
 		</member>
diff --git a/doc/classes/Mesh.xml b/doc/classes/Mesh.xml
index c681886546..465b68171e 100644
--- a/doc/classes/Mesh.xml
+++ b/doc/classes/Mesh.xml
@@ -49,6 +49,10 @@
 			</description>
 		</method>
 	</methods>
+	<members>
+		<member name="lightmap_size_hint" type="Vector2" setter="set_lightmap_size_hint" getter="get_lightmap_size_hint">
+		</member>
+	</members>
 	<constants>
 		<constant name="PRIMITIVE_POINTS" value="0" enum="PrimitiveType">
 			Render array as points (one vertex equals one point).
diff --git a/doc/classes/MeshInstance.xml b/doc/classes/MeshInstance.xml
index ac26c2946f..9273e87430 100644
--- a/doc/classes/MeshInstance.xml
+++ b/doc/classes/MeshInstance.xml
@@ -22,7 +22,7 @@
 			<return type="void">
 			</return>
 			<description>
-		                This helper creates a [MeshInstance] child node with gizmos at every vertex calculated from the mesh geometry. It's mainly used for testing.
+				This helper creates a [MeshInstance] child node with gizmos at every vertex calculated from the mesh geometry. It's mainly used for testing.
 			</description>
 		</method>
 		<method name="create_trimesh_collision">
diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml
index e2198c3e15..a36587c606 100644
--- a/doc/classes/Node.xml
+++ b/doc/classes/Node.xml
@@ -1,19 +1,19 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="Node" inherits="Object" category="Core" version="3.0-beta">
 	<brief_description>
-		Base class for all the [i]scene[/i] elements.
+		Base class for all [i]scene[/i] objects.
 	</brief_description>
 	<description>
-		Nodes are the base bricks with which Godot games are developed. They can be set as children of other nodes, resulting in a tree arrangement. A given node can contain any number of nodes as children (but there is only one scene tree root node) with the requirement that all siblings (direct children of a node) should have unique names.
-		Any tree of nodes is called a [i]scene[/i]. Scenes can be saved to the disk and then instanced into other scenes. This allows for very high flexibility in the architecture and data model of the projects. Nodes can optionally be added to groups. This makes it easy to reach a number of nodes from the code (for example an "enemies" group) to perform grouped actions.
-		[b]Scene tree:[/b] The [SceneTree] contains the active tree of nodes. When a node is added to the scene tree, it receives the NOTIFICATION_ENTER_TREE notification and its [method _enter_tree] callback is triggered. Children nodes are always added [i]after[/i] their parent node, i.e. the [method _enter_tree] callback of a parent node will be triggered before its child's.
-		Once all nodes have been added in the scene tree, they receive the NOTIFICATION_READY notification and their respective [method _ready] callbacks are triggered. For groups of nodes, the [method _ready] callback is called in reverse order, from the children up to the parent nodes.
-		It means that when adding a scene to the scene tree, the following order will be used for the callbacks: [method _enter_tree] of the parent, [method _enter_tree] of the children, [method _ready] of the children and finally [method _ready] of the parent (and that recursively for the whole scene).
-		[b]Processing:[/b] Nodes can be set to the "process" state, so that they receive a callback on each frame requesting them to process (do something). Normal processing (callback [method _process], toggled with [method set_process]) happens as fast as possible and is dependent on the frame rate, so the processing time [i]delta[/i] is variable. Physics processing (callback [method _physics_process], toggled with [method set_physics_process]) happens a fixed amount of times per second (by default 60) and is useful to link itself to the physics.
-		Nodes can also process input events. When set, the [method _input] function will be called for each input that the program receives. In many cases, this can be overkill (unless used for simple projects), and the [method _unhandled_input] function might be preferred; it is called when the input event was not handled by anyone else (typically, GUI [Control] nodes), ensuring that the node only receives the events that were meant for it.
+		Nodes are Godot's building blocks. They can be assigned as the child of another node, resulting in a tree arrangement. A given node can contain any number of nodes as children with the requirement that all siblings (direct children of a node) should have unique names.
+		A tree of nodes is called a [i]scene[/i]. Scenes can be saved to the disk and then instanced into other scenes. This allows for very high flexibility in the architecture and data model of Godot projects. Nodes can also optionally be added to groups. This makes it possible to access a number of nodes from code (an "enemies" group, for example) to perform grouped actions.
+		[b]Scene tree:[/b] The [SceneTree] contains the active tree of nodes. When a node is added to the scene tree, it receives the NOTIFICATION_ENTER_TREE notification and its [method _enter_tree] callback is triggered. Child nodes are always added [i]after[/i] their parent node, i.e. the [method _enter_tree] callback of a parent node will be triggered before its child's.
+		Once all nodes have been added in the scene tree, they receive the NOTIFICATION_READY notification and their respective [method _ready] callbacks are triggered. For groups of nodes, the [method _ready] callback is called in reverse order, starting with the children and moving up to the parent nodes.
+		This means that when adding a node to the scene tree, the following order will be used for the callbacks: [method _enter_tree] of the parent, [method _enter_tree] of the children, [method _ready] of the children and finally [method _ready] of the parent (recursively for the entire scene tree).
+		[b]Processing:[/b] Nodes can override the "process" state, so that they receive a callback on each frame requesting them to process (do something). Normal processing (callback [method _process], toggled with [method set_process]) happens as fast as possible and is dependent on the frame rate, so the processing time [i]delta[/i] is passed as an argument. Physics processing (callback [method _physics_process], toggled with [method set_physics_process]) happens a fixed number of times per second (60 by default) and is useful for code related to the physics engine.
+		Nodes can also process input events. When present, the [method _input] function will be called for each input that the program receives. In many cases, this can be overkill (unless used for simple projects), and the [method _unhandled_input] function might be preferred; it is called when the input event was not handled by anyone else (typically, GUI [Control] nodes), ensuring that the node only receives the events that were meant for it.
 		To keep track of the scene hierarchy (especially when instancing scenes into other scenes), an "owner" can be set for the node with [method set_owner]. This keeps track of who instanced what. This is mostly useful when writing editors and tools, though.
 		Finally, when a node is freed with [method free] or [method queue_free], it will also free all its children.
-		[b]Networking with nodes:[/b] After connecting to a server (or making one, see [NetworkedMultiplayerENet]) it is possible to use the built-in RPC (remote procedure call) system to easily communicate over the network. By calling [method rpc] with a method name, it will be called locally, and in all connected peers (peers = clients and the server that accepts connections), with behaviour varying depending on the network mode ([method set_network_mode]) on the receiving peer. To identify which [code]Node[/code] receives the RPC call Godot will use its [NodePath] (make sure node names are the same on all peers).
+		[b]Networking with nodes:[/b] After connecting to a server (or making one, see [NetworkedMultiplayerENet]) it is possible to use the built-in RPC (remote procedure call) system to communicate over the network. By calling [method rpc] with a method name, it will be called locally and in all connected peers (peers = clients and the server that accepts connections), with behaviour varying depending on the network mode ([method set_network_mode]) of the receiving peer. To identify which node receives the RPC call Godot will use its [NodePath] (make sure node names are the same on all peers).
 	</description>
 	<tutorials>
 	</tutorials>
@@ -24,7 +24,7 @@
 			<return type="void">
 			</return>
 			<description>
-				Called when the node enters the [SceneTree] (e.g. upon instancing, scene changing or after calling [method add_child] in a script). If the node has children, its [method _enter_tree] callback will be called first, and then that of the children.
+				Called when the node enters the [SceneTree] (e.g. upon instancing, scene changing, or after calling [method add_child] in a script). If the node has children, its [method _enter_tree] callback will be called first, and then that of the children.
 				Corresponds to the NOTIFICATION_ENTER_TREE notification in [method Object._notification].
 			</description>
 		</method>
@@ -32,7 +32,7 @@
 			<return type="void">
 			</return>
 			<description>
-				Called when the node leaves the [SceneTree] (e.g. upon freeing, scene changing or after calling [method remove_child] in a script). If the node has children, its [method _exit_tree] callback will be called last, after all its children have left the tree.
+				Called when the node leaves the [SceneTree] (e.g. upon freeing, scene changing, or after calling [method remove_child] in a script). If the node has children, its [method _exit_tree] callback will be called last, after all its children have left the tree.
 				Corresponds to the NOTIFICATION_EXIT_TREE notification in [method Object._notification].
 			</description>
 		</method>
@@ -42,7 +42,7 @@
 			<argument index="0" name="event" type="InputEvent">
 			</argument>
 			<description>
-				Called when there is a change to input devices. Propagated through the node tree until a Node consumes it.
+				Called when there is an input event. The input event propagates through the node tree until a node consumes it.
 				It is only called if input processing is enabled, which is done automatically if this method is overriden, and can be toggled with [method set_process_input].
 			</description>
 		</method>
@@ -72,7 +72,7 @@
 			<return type="void">
 			</return>
 			<description>
-				Called when the node is "ready", i.e. when both the node and its children have entered the scene tree. If the node has children, their [method _ready] callback gets triggered first, and the node will receive the ready notification only afterwards.
+				Called when the node is "ready", i.e. when both the node and its children have entered the scene tree. If the node has children, their [method _ready] callbacks get triggered first, and the parent node will receive the ready notification afterwards.
 				Corresponds to the NOTIFICATION_READY notification in [method Object._notification].
 			</description>
 		</method>
@@ -102,8 +102,8 @@
 			<argument index="1" name="legible_unique_name" type="bool" default="false">
 			</argument>
 			<description>
-				Add a child [code]Node[/code]. Nodes can have as many children as they want, but every child must have a unique name. Children nodes are automatically deleted when the parent node is deleted, so deleting a whole scene is performed by deleting its topmost node.
-				The optional boolean argument enforces creating child nodes with human-readable names, based on the name of the node being instanced instead of its type only.
+				Adds a child node. Nodes can have any number of children, but every child must have a unique name. Child nodes are automatically deleted when the parent node is deleted, so an entire scene can be removed by deleting its topmost node.
+				Setting "legible_unique_name" [code]true[/code] creates child nodes with human-readable names, based on the name of the node being instanced instead of its type.
 			</description>
 		</method>
 		<method name="add_child_below_node">
@@ -116,6 +116,8 @@
 			<argument index="2" name="legible_unique_name" type="bool" default="false">
 			</argument>
 			<description>
+				Adds a child node. The child is placed below the given node in the list of children.
+				Setting "legible_unique_name" [code]true[/code] creates child nodes with human-readable names, based on the name of the node being instanced instead of its type.
 			</description>
 		</method>
 		<method name="add_to_group">
@@ -126,14 +128,14 @@
 			<argument index="1" name="persistent" type="bool" default="false">
 			</argument>
 			<description>
-				Add a node to a group. Groups are helpers to name and organize a subset of nodes, like for example "enemies" or "collectables". A [code]Node[/code] can be in any number of groups. Nodes can be assigned a group at any time, but will not be added to it until they are inside the scene tree (see [method is_inside_tree]).
+				Adds the node to a group. Groups are helpers to name and organize a subset of nodes, for example "enemies" or "collectables". A node can be in any number of groups. Nodes can be assigned a group at any time, but will not be added until they are inside the scene tree (see [method is_inside_tree]).
 			</description>
 		</method>
 		<method name="can_process" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
-				Return true if the node can process, i.e. whether its pause mode allows processing while the scene tree is paused (see [method set_pause_mode]). Always returns true if the scene tree is not paused, and false if the node is not in the tree. FIXME: Why FAIL_COND?
+				Returns [code]true[/code] if the node can process while the scene tree is paused (see [method set_pause_mode]). Always returns [code]true[/code] if the scene tree is not paused, and [code]false[/code] if the node is not in the tree. FIXME: Why FAIL_COND?
 			</description>
 		</method>
 		<method name="duplicate" qualifiers="const">
@@ -142,8 +144,8 @@
 			<argument index="0" name="flags" type="int" default="15">
 			</argument>
 			<description>
-				Duplicate the node, returning a new [code]Node[/code].
-				You can fine-tune the behavior using the [code]flags[/code], which are based on the DUPLICATE_* constants.
+				Duplicates the node, returning a new node.
+				You can fine-tune the behavior using the [code]flags[/code]. See DUPLICATE_* constants.
 			</description>
 		</method>
 		<method name="find_node" qualifiers="const">
@@ -172,35 +174,35 @@
 			<return type="int">
 			</return>
 			<description>
-				Returns the amount of child nodes.
+				Returns the number of child nodes.
 			</description>
 		</method>
 		<method name="get_children" qualifiers="const">
 			<return type="Array">
 			</return>
 			<description>
-				Returns an array of references ([code]Node[/code]) to the child nodes.
+				Returns an array of references to node's children.
 			</description>
 		</method>
 		<method name="get_filename" qualifiers="const">
 			<return type="String">
 			</return>
 			<description>
-				Returns a filename that may be contained by the node. When a scene is instanced from a file, it topmost node contains the filename from where it was loaded (see [method set_filename]).
+				Returns a filename that may be contained by the node. When a scene is instanced from a file, its topmost node contains the filename from which it was loaded (see [method set_filename]).
 			</description>
 		</method>
 		<method name="get_groups" qualifiers="const">
 			<return type="Array">
 			</return>
 			<description>
-				Returns an array listing the groups that the node is part of.
+				Returns an array listing the groups that the node is a member of.
 			</description>
 		</method>
 		<method name="get_index" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
-				Returns the node index, i.e. its position among the siblings of its parent.
+				Returns the node's index, i.e. its position among the siblings of its parent.
 			</description>
 		</method>
 		<method name="get_name" qualifiers="const">
@@ -223,7 +225,7 @@
 			<argument index="0" name="path" type="NodePath">
 			</argument>
 			<description>
-				Fetches a node. The [NodePath] must be valid (or else an error will be raised) and can be either the path to child node, a relative path (from the current node to another node), or an absolute path to a node.
+				Fetches a node. The [NodePath] can be either a relative path (from the current node) or an absolute path (in the scene tree) to a node. If the path does not exist, a [code]null instance[/code] is returned and attempts to access it will result in an "Attempt to call <method> on a null instance." error.
 				Note: fetching absolute paths only works when the node is inside the scene tree (see [method is_inside_tree]).
 				[i]Example:[/i] Assume your current node is Character and the following tree:
 				[codeblock]
@@ -280,7 +282,7 @@
 			<argument index="0" name="node" type="Node">
 			</argument>
 			<description>
-				Returns the relative path from the current node to the specified node in "node" argument. Both nodes must be in the same scene, or else the function will fail.
+				Returns the relative path from the current node to the specified node in "node" argument. Both nodes must be in the same scene, or the function will fail.
 			</description>
 		</method>
 		<method name="get_physics_process_delta_time" qualifiers="const">
@@ -294,14 +296,14 @@
 			<return type="int">
 			</return>
 			<description>
-				Returns the order in the node tree branch, i.e. if called by the first child Node, return 0.
+				Returns the node's order in the scene tree branch. For example, if called on the first child node the position is [code]0[/code].
 			</description>
 		</method>
 		<method name="get_process_delta_time" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
-				Returns the time elapsed (in seconds) since the last process callback. This is almost always different each time.
+				Returns the time elapsed (in seconds) since the last process callback. This value may vary from frame to frame.
 			</description>
 		</method>
 		<method name="get_scene_instance_load_placeholder" qualifiers="const">
@@ -314,14 +316,14 @@
 			<return type="SceneTree">
 			</return>
 			<description>
-				Returns the [SceneTree] that this node is inside.
+				Returns the [SceneTree] that contains this node.
 			</description>
 		</method>
 		<method name="get_viewport" qualifiers="const">
 			<return type="Viewport">
 			</return>
 			<description>
-				Returns the [Viewport] for this node.
+				Returns the node's [Viewport].
 			</description>
 		</method>
 		<method name="has_node" qualifiers="const">
@@ -347,13 +349,14 @@
 			<argument index="0" name="node" type="Node">
 			</argument>
 			<description>
-				Returns [code]true[/code] if the "node" argument is a direct or indirect child of the current node, otherwise return [code]false[code].
+				Returns [code]true[/code] if the given node is a direct or indirect child of the current node.
 			</description>
 		</method>
 		<method name="is_displayed_folded" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+				Returns [code]true[/code] if the node is folded (collapsed) in the Scene dock.
 			</description>
 		</method>
 		<method name="is_greater_than" qualifiers="const">
@@ -362,7 +365,7 @@
 			<argument index="0" name="node" type="Node">
 			</argument>
 			<description>
-				Returns [code]true[/code] if [code]node[/code] occurs later in the scene hierarchy than the current node, otherwise return [code]false[/code].
+				Returns [code]true[/code] if the given node occurs later in the scene hierarchy than the current node.
 			</description>
 		</method>
 		<method name="is_in_group" qualifiers="const">
@@ -371,14 +374,14 @@
 			<argument index="0" name="group" type="String">
 			</argument>
 			<description>
-				Returns [code]true[/code] if this Node is in the specified group.
+				Returns [code]true[/code] if this node is in the specified group.
 			</description>
 		</method>
 		<method name="is_inside_tree" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
-				Returns [code]true[/code] if this Node is currently inside a [SceneTree].
+				Returns [code]true[/code] if this node is currently inside a [SceneTree].
 			</description>
 		</method>
 		<method name="is_network_master" qualifiers="const">
@@ -468,7 +471,7 @@
 			<argument index="2" name="parent_first" type="bool" default="false">
 			</argument>
 			<description>
-				Calls the method (if present) with the arguments given in "args" on this Node and recursively on all children. If the parent_first argument is true then the method will be called on the current [code]Node[/code] first, then on all children. If it is false then the children will get called first.
+				Calls the given method (if present) with the arguments given in [code]args[/code] on this node and recursively on all its children. If the parent_first argument is [code]true[/code] then the method will be called on the current node first, then on all children. If it is [code]false[/code] then the children will be called first.
 			</description>
 		</method>
 		<method name="propagate_notification">
@@ -477,28 +480,28 @@
 			<argument index="0" name="what" type="int">
 			</argument>
 			<description>
-				Notify the current node and all its children recursively by calling notification() in all of them.
+				Notifies the current node and all its children recursively by calling notification() on all of them.
 			</description>
 		</method>
 		<method name="queue_free">
 			<return type="void">
 			</return>
 			<description>
-				Queues a node for deletion at the end of the current frame. When deleted, all of its children nodes will be deleted as well. This method ensures it's safe to delete the node, contrary to [method Object.free]. Use [method Object.is_queued_for_deletion] to know whether a node will be deleted at the end of the frame.
+				Queues a node for deletion at the end of the current frame. When deleted, all of its child nodes will be deleted as well. This method ensures it's safe to delete the node, contrary to [method Object.free]. Use [method Object.is_queued_for_deletion] to check whether a node will be deleted at the end of the frame.
 			</description>
 		</method>
 		<method name="raise">
 			<return type="void">
 			</return>
 			<description>
-				Moves this node to the top of the array of nodes of the parent node. This is often useful on GUIs ([Control]), because their order of drawing fully depends on their order in the tree.
+				Moves this node to the top of the array of nodes of the parent node. This is often useful in GUIs ([Control] nodes), because their order of drawing depends on their order in the tree.
 			</description>
 		</method>
 		<method name="remove_and_skip">
 			<return type="void">
 			</return>
 			<description>
-				Removes a node and set all its children as children of the parent node (if exists). All even subscriptions that pass by the removed node will be unsubscribed.
+				Removes a node and sets all its children as children of the parent node (if it exists). All event subscriptions that pass by the removed node will be unsubscribed.
 			</description>
 		</method>
 		<method name="remove_child">
@@ -507,7 +510,7 @@
 			<argument index="0" name="node" type="Node">
 			</argument>
 			<description>
-				Removes a child [code]Node[/code]. Node is NOT deleted and will have to be deleted manually.
+				Removes a child node. The node is NOT deleted and must be deleted manually.
 			</description>
 		</method>
 		<method name="remove_from_group">
@@ -527,7 +530,7 @@
 			<argument index="1" name="keep_data" type="bool" default="false">
 			</argument>
 			<description>
-				Replaces a node in a scene by a given one. Subscriptions that pass through this node will be lost.
+				Replaces a node in a scene by the given one. Subscriptions that pass through this node will be lost.
 			</description>
 		</method>
 		<method name="request_ready">
@@ -653,6 +656,7 @@
 			<argument index="0" name="fold" type="bool">
 			</argument>
 			<description>
+				Sets the folded state of the node in the Scene dock.
 			</description>
 		</method>
 		<method name="set_filename">
@@ -661,7 +665,7 @@
 			<argument index="0" name="filename" type="String">
 			</argument>
 			<description>
-				A node can contain a filename. This filename should not be changed by the user, unless writing editors and tools. When a scene is instanced from a file, it topmost node contains the filename from where it was loaded.
+				A node can contain a filename. This filename should not be changed by the user, unless writing editors and tools. When a scene is instanced from a file, its topmost node contains the filename from which it was loaded.
 			</description>
 		</method>
 		<method name="set_name">
@@ -670,7 +674,7 @@
 			<argument index="0" name="name" type="String">
 			</argument>
 			<description>
-				Sets the name of the [code]Node[/code]. Name must be unique within parent, and setting an already existing name will cause for the node to be automatically renamed.
+				Sets the name of the node. The name must be unique within the parent. Using an existing name will cause the node to be automatically renamed.
 			</description>
 		</method>
 		<method name="set_network_master">
@@ -690,7 +694,7 @@
 			<argument index="0" name="owner" type="Node">
 			</argument>
 			<description>
-				Sets the node owner. A node can have any other node as owner (as long as a valid parent, grandparent, etc ascending in the tree). When saving a node (using SceneSaver) all the nodes it owns will be saved with it. This allows to create complex SceneTrees, with instancing and subinstancing.
+				Sets the node owner. A node can have any other node as owner (as long as it is a valid parent, grandparent, etc ascending in the tree). When saving a node (using SceneSaver) all the nodes it owns will be saved with it. This allows for the creation of complex [SceneTree]s, with instancing and subinstancing.
 			</description>
 		</method>
 		<method name="set_physics_process">
@@ -699,7 +703,7 @@
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
-				Enables or disables the node's physics (alias fixed framerate) processing. When a node is being processed, it will receive a NOTIFICATION_PHYSICS_PROCESS at a fixed (usually 60 fps, check [OS] to change that) interval (and the [method _physics_process] callback will be called if exists). Enabled automatically if [method _physics_process] is overriden. Any calls to this before [method _ready] will be ignored.
+				Enables or disables physics (i.e. fixed framerate) processing. When a node is being processed, it will receive a NOTIFICATION_PHYSICS_PROCESS at a fixed (usually 60 fps, see [OS] to change) interval (and the [method _physics_process] callback will be called if exists). Enabled automatically if [method _physics_process] is overriden. Any calls to this before [method _ready] will be ignored.
 			</description>
 		</method>
 		<method name="set_physics_process_internal">
@@ -716,7 +720,7 @@
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
-				Enables or disables node processing. When a node is being processed, it will receive a NOTIFICATION_PROCESS on every drawn frame (and the [method _process] callback will be called if exists). Enabled automatically if [method _process] is overriden. Any calls to this before [method _ready] will be ignored.
+				Enables or disables processing. When a node is being processed, it will receive a NOTIFICATION_PROCESS on every drawn frame (and the [method _process] callback will be called if exists). Enabled automatically if [method _process] is overriden. Any calls to this before [method _ready] will be ignored.
 			</description>
 		</method>
 		<method name="set_process_input">
@@ -725,7 +729,7 @@
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
-				Enables input processing for node. This is not required for GUI controls! It hooks up the node to receive all input (see [method _input]). Enabled automatically if [method _input] is overriden. Any calls to this before [method _ready] will be ignored.
+				Enables or disables input processing. This is not required for GUI controls! Enabled automatically if [method _input] is overriden. Any calls to this before [method _ready] will be ignored.
 			</description>
 		</method>
 		<method name="set_process_internal">
@@ -742,7 +746,7 @@
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
-				Enables unhandled input processing for node. This is not required for GUI controls! It hooks up the node to receive all input that was not previously handled before (usually by a [Control]). Enabled automatically if [method _unhandled_input] is overriden. Any calls to this before [method _ready] will be ignored.
+				Enables unhandled input processing. This is not required for GUI controls! It enables the node to receive all input that was not previously handled (usually by a [Control]). Enabled automatically if [method _unhandled_input] is overriden. Any calls to this before [method _ready] will be ignored.
 			</description>
 		</method>
 		<method name="set_process_unhandled_key_input">
@@ -751,7 +755,7 @@
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
-				Enables unhandled key input processing for node. Enabled automatically if [method _unhandled_key_input] is overriden. Any calls to this before [method _ready] will be ignored.
+				Enables unhandled key input processing. Enabled automatically if [method _unhandled_key_input] is overriden. Any calls to this before [method _ready] will be ignored.
 			</description>
 		</method>
 		<method name="set_scene_instance_load_placeholder">
@@ -775,12 +779,12 @@
 		</signal>
 		<signal name="tree_entered">
 			<description>
-				Emitted when Node enters the tree.
+				Emitted when the node enters the tree.
 			</description>
 		</signal>
 		<signal name="tree_exited">
 			<description>
-				Emitted when Node exits the tree.
+				Emitted when the node exits the tree.
 			</description>
 		</signal>
 	</signals>
diff --git a/doc/classes/PhysicsServer.xml b/doc/classes/PhysicsServer.xml
index eb7c735277..534449a94b 100644
--- a/doc/classes/PhysicsServer.xml
+++ b/doc/classes/PhysicsServer.xml
@@ -351,15 +351,6 @@
 				Creates a physics body. The first parameter can be any value from constants BODY_MODE*, for the type of body created. Additionally, the body can be created in sleeping state to save processing time.
 			</description>
 		</method>
-		<method name="body_get_axis_lock" qualifiers="const">
-			<return type="int" enum="PhysicsServer.BodyAxisLock">
-			</return>
-			<argument index="0" name="body" type="RID">
-			</argument>
-			<description>
-				Gets the information, which Axis is locked if any. The can be any calue from the constants BODY_AXIS_LOCK*
-			</description>
-		</method>
 		<method name="body_get_collision_layer" qualifiers="const">
 			<return type="int">
 			</return>
@@ -485,6 +476,16 @@
 				Returns a body state.
 			</description>
 		</method>
+		<method name="body_is_axis_locked" qualifiers="const">
+			<return type="bool">
+			</return>
+			<argument index="0" name="body" type="RID">
+			</argument>
+			<argument index="1" name="axis" type="int" enum="PhysicsServer.BodyAxis">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="body_is_continuous_collision_detection_enabled" qualifiers="const">
 			<return type="bool">
 			</return>
@@ -540,10 +541,11 @@
 			</return>
 			<argument index="0" name="body" type="RID">
 			</argument>
-			<argument index="1" name="axis" type="int" enum="PhysicsServer.BodyAxisLock">
+			<argument index="1" name="axis" type="int" enum="PhysicsServer.BodyAxis">
+			</argument>
+			<argument index="2" name="lock" type="bool">
 			</argument>
 			<description>
-				Locks velocity along one axis to 0 and only allows rotation along this axis, can also be set to disabled which disables this functionality.
 			</description>
 		</method>
 		<method name="body_set_axis_velocity">
@@ -1519,17 +1521,17 @@
 		<constant name="SPACE_PARAM_CONSTRAINT_DEFAULT_BIAS" value="7" enum="SpaceParameter">
 			Constant to set/get the default solver bias for all physics constraints. A solver bias is a factor controlling how much two objects "rebound", after violating a constraint, to avoid leaving them in that state because of numerical imprecision.
 		</constant>
-		<constant name="BODY_AXIS_LOCK_DISABLED" value="0" enum="BodyAxisLock">
-			The [Body] can rotate and move freely.
+		<constant name="BODY_AXIS_LINEAR_X" value="1" enum="BodyAxis">
+		</constant>
+		<constant name="BODY_AXIS_LINEAR_Y" value="2" enum="BodyAxis">
+		</constant>
+		<constant name="BODY_AXIS_LINEAR_Z" value="4" enum="BodyAxis">
 		</constant>
-		<constant name="BODY_AXIS_LOCK_X" value="1" enum="BodyAxisLock">
-			The [Body] cannot move across x axis can only rotate across x axis.
+		<constant name="BODY_AXIS_ANGULAR_X" value="8" enum="BodyAxis">
 		</constant>
-		<constant name="BODY_AXIS_LOCK_Y" value="2" enum="BodyAxisLock">
-			The [Body] cannot move across y axis can only rotate across y axis.
+		<constant name="BODY_AXIS_ANGULAR_Y" value="16" enum="BodyAxis">
 		</constant>
-		<constant name="BODY_AXIS_LOCK_Z" value="3" enum="BodyAxisLock">
-			The [Body] cannot move across z axis can only rotate across z axis.
+		<constant name="BODY_AXIS_ANGULAR_Z" value="32" enum="BodyAxis">
 		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/PopupMenu.xml b/doc/classes/PopupMenu.xml
index 7071d64f2e..372a97ecd9 100644
--- a/doc/classes/PopupMenu.xml
+++ b/doc/classes/PopupMenu.xml
@@ -233,6 +233,12 @@
 			<description>
 			</description>
 		</method>
+		<method name="is_hide_on_state_item_selection" qualifiers="const">
+			<return type="bool">
+			</return>
+			<description>
+			</description>
+		</method>
 		<method name="is_item_checkable" qualifiers="const">
 			<return type="bool">
 			</return>
@@ -278,6 +284,14 @@
 				Removes the item at index "idx" from the menu. Note that the indexes of items after the removed item are going to be shifted by one.
 			</description>
 		</method>
+		<method name="set_hide_on_state_item_selection">
+			<return type="void">
+			</return>
+			<argument index="0" name="enable" type="bool">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="set_item_accelerator">
 			<return type="void">
 			</return>
@@ -377,6 +391,16 @@
 			<description>
 			</description>
 		</method>
+		<method name="set_item_multistate">
+			<return type="void">
+			</return>
+			<argument index="0" name="idx" type="int">
+			</argument>
+			<argument index="1" name="state" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="set_item_submenu">
 			<return type="void">
 			</return>
@@ -417,6 +441,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="toggle_item_multistate">
+			<return type="void">
+			</return>
+			<argument index="0" name="idx" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
 	</methods>
 	<members>
 		<member name="hide_on_checkable_item_selection" type="bool" setter="set_hide_on_checkable_item_selection" getter="is_hide_on_checkable_item_selection">
diff --git a/doc/classes/Rect2.xml b/doc/classes/Rect2.xml
index 1bd2e812e6..4fc9e5bd71 100644
--- a/doc/classes/Rect2.xml
+++ b/doc/classes/Rect2.xml
@@ -37,6 +37,12 @@
 				Constructs a [code]Rect2[/code] by x, y, width, and height.
 			</description>
 		</method>
+		<method name="abs">
+			<return type="Rect2">
+			</return>
+			<description>
+			</description>
+		</method>
 		<method name="clip">
 			<return type="Rect2">
 			</return>
diff --git a/doc/classes/RigidBody.xml b/doc/classes/RigidBody.xml
index cfcd0258e7..3c54f29c15 100644
--- a/doc/classes/RigidBody.xml
+++ b/doc/classes/RigidBody.xml
@@ -59,8 +59,17 @@
 		<member name="angular_velocity" type="Vector3" setter="set_angular_velocity" getter="get_angular_velocity">
 			RigidBody's rotational velocity.
 		</member>
-		<member name="axis_lock" type="int" setter="set_axis_lock" getter="get_axis_lock" enum="RigidBody.AxisLock">
-			Locks the rotational forces to a particular axis, preventing rotations on other axes.
+		<member name="axis_lock_angular_x" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_angular_y" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_angular_z" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_x" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_y" type="bool" setter="set_axis_lock" getter="get_axis_lock">
+		</member>
+		<member name="axis_lock_linear_z" type="bool" setter="set_axis_lock" getter="get_axis_lock">
 		</member>
 		<member name="bounce" type="float" setter="set_bounce" getter="get_bounce">
 			RigidBody's bounciness.
@@ -168,13 +177,5 @@
 		<constant name="MODE_KINEMATIC" value="3" enum="Mode">
 			Kinematic body. The body behaves like a [KinematicBody], and can only move by user code.
 		</constant>
-		<constant name="AXIS_LOCK_DISABLED" value="0" enum="AxisLock">
-		</constant>
-		<constant name="AXIS_LOCK_X" value="1" enum="AxisLock">
-		</constant>
-		<constant name="AXIS_LOCK_Y" value="2" enum="AxisLock">
-		</constant>
-		<constant name="AXIS_LOCK_Z" value="3" enum="AxisLock">
-		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/SurfaceTool.xml b/doc/classes/SurfaceTool.xml
index 22099a930c..0916d0b928 100644
--- a/doc/classes/SurfaceTool.xml
+++ b/doc/classes/SurfaceTool.xml
@@ -170,6 +170,8 @@
 			</return>
 			<argument index="0" name="existing" type="ArrayMesh" default="null">
 			</argument>
+			<argument index="1" name="flags" type="int" default="97792">
+			</argument>
 			<description>
 				Returns a constructed [ArrayMesh] from current information passed in. If an existing [ArrayMesh] is passed in as an argument, will add an extra surface to the existing [ArrayMesh].
 			</description>
diff --git a/doc/classes/TextEdit.xml b/doc/classes/TextEdit.xml
index df48ba3e23..da30c4c7bd 100644
--- a/doc/classes/TextEdit.xml
+++ b/doc/classes/TextEdit.xml
@@ -305,6 +305,15 @@
 				Enable text wrapping when it goes beyond he edge of what is visible.
 			</description>
 		</method>
+		<method name="toggle_fold_line">
+			<return type="void">
+			</return>
+			<argument index="0" name="line" type="int">
+			</argument>
+			<description>
+				Toggle the folding of the code block at the given line.
+			</description>
+		</method>
 		<method name="undo">
 			<return type="void">
 			</return>
@@ -428,6 +437,8 @@
 		</theme_item>
 		<theme_item name="caret_color" type="Color">
 		</theme_item>
+		<theme_item name="code_folding_color" type="Color">
+		</theme_item>
 		<theme_item name="completion" type="StyleBox">
 		</theme_item>
 		<theme_item name="completion_background_color" type="Color">
diff --git a/doc/classes/Texture.xml b/doc/classes/Texture.xml
index 52972177bf..bf27bc4768 100644
--- a/doc/classes/Texture.xml
+++ b/doc/classes/Texture.xml
@@ -133,7 +133,7 @@
 			More effective on planes often shown going to the horrizon as those textures (Walls or Ground for example) get squashed in the viewport to different aspect ratios and regular mipmaps keep the aspect ratio so they don't optimize storage that well in those cases.
 		</constant>
 		<constant name="FLAG_CONVERT_TO_LINEAR" value="16" enum="Flags">
-			Converts texture to SRGB color space. 
+			Converts texture to SRGB color space.
 		</constant>
 		<constant name="FLAG_MIRRORED_REPEAT" value="32" enum="Flags">
 			Repeats texture with alternate sections mirrored.
diff --git a/doc/classes/TileMap.xml b/doc/classes/TileMap.xml
index e58ab3dd25..72cd56dc55 100644
--- a/doc/classes/TileMap.xml
+++ b/doc/classes/TileMap.xml
@@ -38,20 +38,6 @@
 				Return the tile index of the cell referenced by a Vector2.
 			</description>
 		</method>
-		<method name="get_center_x" qualifiers="const">
-			<return type="bool">
-			</return>
-			<description>
-				Return true if tiles are to be centered in x coordinate (by default this is false and they are drawn from upper left cell corner).
-			</description>
-		</method>
-		<method name="get_center_y" qualifiers="const">
-			<return type="bool">
-			</return>
-			<description>
-				Return true if tiles are to be centered in y coordinate (by default this is false and they are drawn from upper left cell corner).
-			</description>
-		</method>
 		<method name="get_collision_layer_bit" qualifiers="const">
 			<return type="bool">
 			</return>
@@ -176,24 +162,6 @@
 				Optionally, the tile can also be flipped over the X and Y axes or transposed.
 			</description>
 		</method>
-		<method name="set_center_x">
-			<return type="void">
-			</return>
-			<argument index="0" name="enable" type="bool">
-			</argument>
-			<description>
-				Set tiles to be centered in x coordinate. (by default this is false and they are drawn from upper left cell corner).
-			</description>
-		</method>
-		<method name="set_center_y">
-			<return type="void">
-			</return>
-			<argument index="0" name="enable" type="bool">
-			</argument>
-			<description>
-				Set tiles to be centered in y coordinate. (by default this is false and they are drawn from upper left cell corner).
-			</description>
-		</method>
 		<method name="set_collision_layer_bit">
 			<return type="void">
 			</return>
diff --git a/doc/classes/TileSet.xml b/doc/classes/TileSet.xml
index 6a147a9646..3dbf172c5a 100644
--- a/doc/classes/TileSet.xml
+++ b/doc/classes/TileSet.xml
@@ -13,7 +13,7 @@
 	</demos>
 	<methods>
 		<method name="_forward_subtile_selection" qualifiers="virtual">
-			<return type="void">
+			<return type="Vector2">
 			</return>
 			<argument index="0" name="autotile_id" type="int">
 			</argument>
@@ -27,7 +27,7 @@
 			</description>
 		</method>
 		<method name="_is_tile_bound" qualifiers="virtual">
-			<return type="void">
+			<return type="bool">
 			</return>
 			<argument index="0" name="drawn_id" type="int">
 			</argument>
diff --git a/doc/classes/VisualServer.xml b/doc/classes/VisualServer.xml
index 6627ba15d5..1a9dc3a669 100644
--- a/doc/classes/VisualServer.xml
+++ b/doc/classes/VisualServer.xml
@@ -647,7 +647,7 @@
 			<argument index="1" name="height" type="float">
 			</argument>
 			<description>
-				Sets a canvas light's height. 
+				Sets a canvas light's height.
 			</description>
 		</method>
 		<method name="canvas_light_set_item_cull_mask">
@@ -1495,7 +1495,7 @@
 			<return type="Array">
 			</return>
 			<description>
-				Returns a list of all the textures and their information. 
+				Returns a list of all the textures and their information.
 			</description>
 		</method>
 		<method name="texture_get_data" qualifiers="const">
@@ -2022,7 +2022,7 @@
 			More effective on planes often shown going to the horrizon as those textures (Walls or Ground for example) get squashed in the viewport to different aspect ratios and regular mipmaps keep the aspect ratio so they don't optimize storage that well in those cases.
 		</constant>
 		<constant name="TEXTURE_FLAG_CONVERT_TO_LINEAR" value="16" enum="TextureFlags">
-			Converts texture to SRGB color space. 
+			Converts texture to SRGB color space.
 		</constant>
 		<constant name="TEXTURE_FLAG_MIRRORED_REPEAT" value="32" enum="TextureFlags">
 			Repeat texture with alternate sections mirrored.
@@ -2397,4 +2397,4 @@
 		<constant name="FEATURE_MULTITHREADED" value="1" enum="Features">
 		</constant>
 	</constants>
-</class>
-\ No newline at end of file
+</class>
diff --git a/drivers/gles3/shader_compiler_gles3.cpp b/drivers/gles3/shader_compiler_gles3.cpp
index 05381da9b9..21102e8c25 100644
--- a/drivers/gles3/shader_compiler_gles3.cpp
+++ b/drivers/gles3/shader_compiler_gles3.cpp
@@ -230,7 +230,6 @@ void ShaderCompilerGLES3::_dump_function_deps(SL::ShaderNode *p_node, const Stri
 
 	for (Set<StringName>::Element *E = p_node->functions[fidx].uses_function.front(); E; E = E->next()) {
 
-		print_line(String(p_node->functions[fidx].name) + " uses function: " + String(E->get()));
 		if (added.has(E->get())) {
 			continue; //was added already
 		}
diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp
index 5a06a58b80..bc18707deb 100644
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -141,42 +141,14 @@ void OS_Unix::alert(const String &p_alert, const String &p_title) {
 	fprintf(stderr, "ERROR: %s\n", p_alert.utf8().get_data());
 }
 
-static int has_data(FILE *p_fd, int timeout_usec = 0) {
-
-	fd_set readset;
-	int fd = fileno(p_fd);
-	FD_ZERO(&readset);
-	FD_SET(fd, &readset);
-	timeval time;
-	time.tv_sec = 0;
-	time.tv_usec = timeout_usec;
-	int res = 0; //select(fd + 1, &readset, NULL, NULL, &time);
-	return res > 0;
-};
-
 String OS_Unix::get_stdin_string(bool p_block) {
 
-	String ret;
 	if (p_block) {
 		char buff[1024];
-		ret = stdin_buf + fgets(buff, 1024, stdin);
+		String ret = stdin_buf + fgets(buff, 1024, stdin);
 		stdin_buf = "";
 		return ret;
-	};
-
-	while (has_data(stdin)) {
-
-		char ch;
-		read(fileno(stdin), &ch, 1);
-		if (ch == '\n') {
-			ret = stdin_buf;
-			stdin_buf = "";
-			return ret;
-		} else {
-			char str[2] = { ch, 0 };
-			stdin_buf += str;
-		};
-	};
+	}
 
 	return "";
 }
@@ -194,8 +166,6 @@ uint64_t OS_Unix::get_unix_time() const {
 uint64_t OS_Unix::get_system_time_secs() const {
 	struct timeval tv_now;
 	gettimeofday(&tv_now, NULL);
-	//localtime(&tv_now.tv_usec);
-	//localtime((const long *)&tv_now.tv_usec);
 	return uint64_t(tv_now.tv_sec);
 }
 
diff --git a/editor/editor_help.cpp b/editor/editor_help.cpp
index 8f427582ae..676b168371 100644
--- a/editor/editor_help.cpp
+++ b/editor/editor_help.cpp
@@ -202,8 +202,9 @@ class EditorHelpSearch::IncrementalSearch : public Reference {
 	}
 
 public:
-	IncrementalSearch(EditorHelpSearch *p_search, Tree *p_search_options, const String &p_term)
-		: search(p_search), search_options(p_search_options) {
+	IncrementalSearch(EditorHelpSearch *p_search, Tree *p_search_options, const String &p_term) :
+			search(p_search),
+			search_options(p_search_options) {
 
 		def_icon = search->get_icon("Node", "EditorIcons");
 		doc = EditorHelp::get_doc_data();
diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp
index 0252358a27..27ed53bb42 100644
--- a/editor/editor_node.cpp
+++ b/editor/editor_node.cpp
@@ -1884,7 +1884,10 @@ void EditorNode::_menu_option_confirm(int p_option, bool p_confirmed) {
 				save_confirmation->set_text(vformat(TTR("Save changes to '%s' before closing?"), scene_filename != "" ? scene_filename : "unsaved scene"));
 				save_confirmation->popup_centered_minsize();
 				break;
+			} else {
+				tab_closing = editor_data.get_edited_scene();
 			}
+
 		} // fallthrough
 		case SCENE_TAB_CLOSE:
 		case FILE_SAVE_SCENE: {
@@ -3366,7 +3369,6 @@ void EditorNode::register_editor_types() {
 	ClassDB::register_class<EditorResourceConversionPlugin>();
 	ClassDB::register_class<EditorSceneImporter>();
 
-
 	// FIXME: Is this stuff obsolete, or should it be ported to new APIs?
 	ClassDB::register_class<EditorScenePostImport>();
 	//ClassDB::register_type<EditorImportExport>();
diff --git a/editor/editor_plugin.h b/editor/editor_plugin.h
index 89a6d3d250..f45d1c1ecc 100644
--- a/editor/editor_plugin.h
+++ b/editor/editor_plugin.h
@@ -31,11 +31,11 @@
 #define EDITOR_PLUGIN_H
 
 #include "editor/import/editor_import_plugin.h"
+#include "editor/import/resource_importer_scene.h"
 #include "io/config_file.h"
 #include "scene/gui/tool_button.h"
 #include "scene/main/node.h"
 #include "scene/resources/texture.h"
-#include "editor/import/resource_importer_scene.h"
 #include "undo_redo.h"
 
 /**
diff --git a/editor/editor_run.cpp b/editor/editor_run.cpp
index 90cbabcc4f..09bfa0aff0 100644
--- a/editor/editor_run.cpp
+++ b/editor/editor_run.cpp
@@ -101,7 +101,14 @@ Error EditorRun::run(const String &p_scene, const String p_custom_args, const Li
 			args.push_back(itos(screen_rect.position.x) + "," + itos(screen_rect.position.y));
 		} break;
 		case 1: { // centered
-			Vector2 pos = screen_rect.position + ((screen_rect.size - desired_size) / 2).floor();
+			int display_scale = 1;
+#ifdef OSX_ENABLED
+			if (OS::get_singleton()->get_screen_dpi(screen) >= 192 && OS::get_singleton()->get_screen_size(screen).x > 2000) {
+				display_scale = 2;
+			}
+#endif
+
+			Vector2 pos = screen_rect.position + ((screen_rect.size / display_scale - desired_size) / 2).floor();
 			args.push_back("--position");
 			args.push_back(itos(pos.x) + "," + itos(pos.y));
 		} break;
diff --git a/editor/editor_themes.cpp b/editor/editor_themes.cpp
index 4661fcf668..152eda7d91 100644
--- a/editor/editor_themes.cpp
+++ b/editor/editor_themes.cpp
@@ -587,9 +587,7 @@ Ref<Theme> create_editor_theme(const Ref<Theme> p_theme) {
 
 	// Checkbox
 	Ref<StyleBoxFlat> sb_checkbox = style_menu->duplicate();
-	// HACK, in reality, the checkbox draws the text over the icon by default, so the margin compensates that.
-	const int cb_w = theme->get_icon("GuiChecked", "EditorIcons")->get_width() + default_margin_size;
-	sb_checkbox->set_default_margin(MARGIN_LEFT, cb_w * EDSCALE);
+	sb_checkbox->set_default_margin(MARGIN_LEFT, default_margin_size * EDSCALE);
 	sb_checkbox->set_default_margin(MARGIN_RIGHT, default_margin_size * EDSCALE);
 	sb_checkbox->set_default_margin(MARGIN_TOP, default_margin_size * EDSCALE);
 	sb_checkbox->set_default_margin(MARGIN_BOTTOM, default_margin_size * EDSCALE);
@@ -625,6 +623,9 @@ Ref<Theme> create_editor_theme(const Ref<Theme> p_theme) {
 	theme->set_icon("radio_checked", "PopupMenu", theme->get_icon("GuiChecked", "EditorIcons"));
 	theme->set_icon("radio_unchecked", "PopupMenu", theme->get_icon("GuiUnchecked", "EditorIcons"));
 	theme->set_icon("submenu", "PopupMenu", theme->get_icon("ArrowRight", "EditorIcons"));
+	theme->set_icon("visibility_hidden", "PopupMenu", theme->get_icon("GuiVisibilityHidden", "EditorIcons"));
+	theme->set_icon("visibility_visible", "PopupMenu", theme->get_icon("GuiVisibilityVisible", "EditorIcons"));
+	theme->set_icon("visibility_xray", "PopupMenu", theme->get_icon("GuiVisibilityXray", "EditorIcons"));
 	theme->set_constant("vseparation", "PopupMenu", (extra_spacing + default_margin_size) * EDSCALE);
 
 	// Tree & ItemList background
diff --git a/editor/icons/icon_GUI_visibility_hidden.svg b/editor/icons/icon_GUI_visibility_hidden.svg
new file mode 100644
index 0000000000..2add2e9eb8
--- /dev/null
+++ b/editor/icons/icon_GUI_visibility_hidden.svg
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16"
+   height="16"
+   version="1.1"
+   viewBox="0 0 16 16"
+   id="svg2"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="icon_GUI_visibility_hidden.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1920"
+     inkscape:window-height="1027"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="14.75"
+     inkscape:cx="18.882384"
+     inkscape:cy="7.2939487"
+     inkscape:window-x="-8"
+     inkscape:window-y="-8"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="svg2" />
+  <path
+     style="color:#000000;text-indent:0;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;white-space:normal;isolation:auto;mix-blend-mode:normal;solid-color:#000000;fill:#e0e0e0;fill-opacity:1;fill-rule:evenodd;color-rendering:auto;image-rendering:auto;shape-rendering:auto"
+     d="M 8.3320312 2.1328125 C 8.1166713 2.129146 7.900423 2.1368613 7.6855469 2.1542969 C 4.8418629 2.3850399 2.1034153 4.4237115 1.0449219 7.5722656 C 0.98765482 7.7577705 0.9856205 7.9559357 1.0390625 8.1425781 C 1.2458895 8.8664725 1.5352035 9.5092453 1.8730469 10.089844 L 12.501953 3.7890625 C 11.256805 2.6845102 9.797893 2.1577685 8.3320312 2.1328125 z M 14.554688 3.3046875 L 0.7421875 11.507812 L 1.4453125 12.695312 L 15.257812 4.4921875 L 14.554688 3.3046875 z M 14.169922 5.8847656 L 3.6171875 12.140625 C 4.9944165 13.294116 6.6188565 13.867188 8 13.867188 C 10.5 13.867188 13.836536 12.077978 14.960938 8.1425781 C 15.012856 7.9619931 15.012856 7.7704285 14.960938 7.5898438 C 14.731965 6.9583712 14.46336 6.3981967 14.169922 5.8847656 z "
+     id="path6" />
+</svg>
diff --git a/editor/icons/icon_GUI_visibility_visible.svg b/editor/icons/icon_GUI_visibility_visible.svg
new file mode 100644
index 0000000000..11ae563779
--- /dev/null
+++ b/editor/icons/icon_GUI_visibility_visible.svg
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16"
+   height="16"
+   version="1.1"
+   viewBox="0 0 16 16"
+   id="svg2"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="icon_visibility_visible.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1920"
+     inkscape:window-height="1027"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="14.75"
+     inkscape:cx="15.823281"
+     inkscape:cy="12.108563"
+     inkscape:window-x="-8"
+     inkscape:window-y="-8"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="svg2" />
+  <g
+     transform="translate(0 -1036.4)"
+     id="g4"
+     style="fill:#e0e0e0;fill-opacity:1">
+    <path
+       transform="translate(0,1036.4)"
+       d="M 8,2 C 5.4433,2 2.2093,3.9477 1.0449,7.7051 c -0.0572671,0.1855049 -0.059303,0.3836676 -0.00586,0.57031 1.1244,3.9354 4.4609,5.7246 6.9609,5.7246 2.5000004,0 5.8365004,-1.7892 6.9609004,-5.7246 0.05192,-0.180585 0.05192,-0.372145 0,-0.55273 -1.1003,-3.7876 -4.4066,-5.7227 -6.9609004,-5.7227 z m 0,2 c 2.209139,0 4,1.790861 4,4 0,2.209139 -1.790861,4 -4,4 C 5.790861,12 4,10.209139 4,8 4,5.790861 5.790861,4 8,4 Z M 8,6 C 6.8954305,6 6,6.8954305 6,8 6,9.1045695 6.8954305,10 8,10 9.1045695,10 10,9.1045695 10,8 10,6.8954305 9.1045695,6 8,6 Z"
+       style="color:#000000;text-indent:0;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;white-space:normal;isolation:auto;mix-blend-mode:normal;solid-color:#000000;fill:#e0e0e0;fill-opacity:1;fill-rule:evenodd;color-rendering:auto;image-rendering:auto;shape-rendering:auto"
+       id="path6"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccsccccssssssssss" />
+  </g>
+</svg>
diff --git a/editor/icons/icon_GUI_visibility_xray.svg b/editor/icons/icon_GUI_visibility_xray.svg
new file mode 100644
index 0000000000..1fd9fcf1b5
--- /dev/null
+++ b/editor/icons/icon_GUI_visibility_xray.svg
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16"
+   height="16"
+   version="1.1"
+   viewBox="0 0 16 16"
+   id="svg2"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="icon_GUI_visibility_xray.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1920"
+     inkscape:window-height="1027"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="7.375"
+     inkscape:cx="43.019438"
+     inkscape:cy="-8.9853027"
+     inkscape:window-x="-8"
+     inkscape:window-y="-8"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="svg2" />
+  <g
+     transform="translate(0.20338214,-1036.671)"
+     id="g4"
+     style="fill:#e0e0e0;fill-opacity:1" />
+  <path
+     id="path4154"
+     style="opacity:1;fill:#e0e0e0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1.42799997;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+     d="m 5.0107427,7.1191578 c -0.084872,0.2859445 -0.1282828,0.5825859 -0.128907,0.8808593 8.579e-4,0.263009 0.034983,0.5248532 0.101563,0.7792969 l 6.0312493,0 C 11.081887,8.5249547 11.116668,8.2631092 11.118164,8.0000171 11.11754,7.7017437 11.074129,7.4051023 10.989257,7.1191578 Z M 7.9999096,2.000005 c -2.5567,0 -5.7907,1.9477 -6.9551,5.7051 -0.057267,0.1855049 -0.059303,0.3836676 -0.00586,0.57031 1.1244,3.9354 4.4609,5.7246 6.9609,5.7246 2.4999994,0 5.8364994,-1.7892 6.9608994,-5.7246 0.05192,-0.180585 0.05192,-0.372145 0,-0.55273 -1.1003,-3.7876 -4.4066,-5.7227 -6.9608994,-5.7227 z m 0,2 c 2.2091384,0 3.9999994,1.790861 3.9999994,4 0,2.209139 -1.790861,4 -3.9999994,4 -2.209139,0 -4,-1.790861 -4,-4 0,-2.209139 1.790861,-4 4,-4 z"
+     inkscape:connector-curvature="0"
+     sodipodi:nodetypes="ccccccccccsccccsssss" />
+</svg>
diff --git a/editor/import/editor_import_collada.cpp b/editor/import/editor_import_collada.cpp
index c38391c71b..c4315f1b83 100644
--- a/editor/import/editor_import_collada.cpp
+++ b/editor/import/editor_import_collada.cpp
@@ -1979,7 +1979,7 @@ Node *EditorSceneImporterCollada::import_scene(const String &p_path, uint32_t p_
 	return state.scene;
 }
 
-Ref<Animation> EditorSceneImporterCollada::import_animation(const String &p_path, uint32_t p_flags,int p_bake_fps) {
+Ref<Animation> EditorSceneImporterCollada::import_animation(const String &p_path, uint32_t p_flags, int p_bake_fps) {
 
 	ColladaImport state;
 
diff --git a/editor/import/editor_import_collada.h b/editor/import/editor_import_collada.h
index 986b5b766f..904080c19b 100644
--- a/editor/import/editor_import_collada.h
+++ b/editor/import/editor_import_collada.h
@@ -40,7 +40,7 @@ public:
 	virtual uint32_t get_import_flags() const;
 	virtual void get_extensions(List<String> *r_extensions) const;
 	virtual Node *import_scene(const String &p_path, uint32_t p_flags, int p_bake_fps, List<String> *r_missing_deps = NULL, Error *r_err = NULL);
-	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags,int p_bake_fps);
+	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags, int p_bake_fps);
 
 	EditorSceneImporterCollada();
 };
diff --git a/editor/import/editor_scene_importer_gltf.cpp b/editor/import/editor_scene_importer_gltf.cpp
index e801f3e7c3..00eb69a568 100644
--- a/editor/import/editor_scene_importer_gltf.cpp
+++ b/editor/import/editor_scene_importer_gltf.cpp
@@ -1597,8 +1597,11 @@ void EditorSceneImporterGLTF::_generate_node(GLTFState &state, int p_node, Node
 	if (n->mesh >= 0) {
 		ERR_FAIL_INDEX(n->mesh, state.meshes.size());
 		MeshInstance *mi = memnew(MeshInstance);
-		const GLTFMesh &mesh = state.meshes[n->mesh];
+		GLTFMesh &mesh = state.meshes[n->mesh];
 		mi->set_mesh(mesh.mesh);
+		if (mesh.mesh->get_name() == "") {
+			mesh.mesh->set_name(n->name);
+		}
 		for (int i = 0; i < mesh.blend_weights.size(); i++) {
 			mi->set("blend_shapes/" + mesh.mesh->get_blend_shape_name(i), mesh.blend_weights[i]);
 		}
diff --git a/editor/import/editor_scene_importer_gltf.h b/editor/import/editor_scene_importer_gltf.h
index 0c8000427e..91c584a05a 100644
--- a/editor/import/editor_scene_importer_gltf.h
+++ b/editor/import/editor_scene_importer_gltf.h
@@ -296,7 +296,7 @@ public:
 	virtual uint32_t get_import_flags() const;
 	virtual void get_extensions(List<String> *r_extensions) const;
 	virtual Node *import_scene(const String &p_path, uint32_t p_flags, int p_bake_fps, List<String> *r_missing_deps = NULL, Error *r_err = NULL);
-	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags,int p_bake_fps);
+	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags, int p_bake_fps);
 
 	EditorSceneImporterGLTF();
 };
diff --git a/editor/import/resource_importer_obj.h b/editor/import/resource_importer_obj.h
index 09dc8ac8a1..e66ea47b69 100644
--- a/editor/import/resource_importer_obj.h
+++ b/editor/import/resource_importer_obj.h
@@ -40,7 +40,7 @@ public:
 	virtual uint32_t get_import_flags() const;
 	virtual void get_extensions(List<String> *r_extensions) const;
 	virtual Node *import_scene(const String &p_path, uint32_t p_flags, int p_bake_fps, List<String> *r_missing_deps, Error *r_err = NULL);
-	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags,int p_bake_fps);
+	virtual Ref<Animation> import_animation(const String &p_path, uint32_t p_flags, int p_bake_fps);
 
 	EditorOBJImporter();
 };
diff --git a/editor/plugins/asset_library_editor_plugin.cpp b/editor/plugins/asset_library_editor_plugin.cpp
index eee54f3cd2..3ab8f318a7 100644
--- a/editor/plugins/asset_library_editor_plugin.cpp
+++ b/editor/plugins/asset_library_editor_plugin.cpp
@@ -915,6 +915,11 @@ void EditorAssetLibrary::_search(int p_page) {
 	_api_request("asset", REQUESTING_SEARCH, args);
 }
 
+void EditorAssetLibrary::_search_text_entered(const String &p_text) {
+
+	_search();
+}
+
 HBoxContainer *EditorAssetLibrary::_make_pages(int p_page, int p_page_count, int p_page_len, int p_total_items, int p_current_items) {
 
 	HBoxContainer *hbc = memnew(HBoxContainer);
@@ -1280,6 +1285,7 @@ void EditorAssetLibrary::_bind_methods() {
 	ClassDB::bind_method("_select_category", &EditorAssetLibrary::_select_category);
 	ClassDB::bind_method("_image_request_completed", &EditorAssetLibrary::_image_request_completed);
 	ClassDB::bind_method("_search", &EditorAssetLibrary::_search, DEFVAL(0));
+	ClassDB::bind_method("_search_text_entered", &EditorAssetLibrary::_search_text_entered);
 	ClassDB::bind_method("_install_asset", &EditorAssetLibrary::_install_asset);
 	ClassDB::bind_method("_manage_plugins", &EditorAssetLibrary::_manage_plugins);
 	ClassDB::bind_method("_asset_open", &EditorAssetLibrary::_asset_open);
@@ -1309,7 +1315,7 @@ EditorAssetLibrary::EditorAssetLibrary(bool p_templates_only) {
 	filter = memnew(LineEdit);
 	search_hb->add_child(filter);
 	filter->set_h_size_flags(SIZE_EXPAND_FILL);
-	filter->connect("text_entered", this, "_search");
+	filter->connect("text_entered", this, "_search_text_entered");
 	search = memnew(Button(TTR("Search")));
 	search->connect("pressed", this, "_search");
 	search_hb->add_child(search);
diff --git a/editor/plugins/asset_library_editor_plugin.h b/editor/plugins/asset_library_editor_plugin.h
index 90d597e70a..5536fbb2ec 100644
--- a/editor/plugins/asset_library_editor_plugin.h
+++ b/editor/plugins/asset_library_editor_plugin.h
@@ -284,6 +284,7 @@ class EditorAssetLibrary : public PanelContainer {
 
 	void _search(int p_page = 0);
 	void _rerun_search(int p_ignore);
+	void _search_text_entered(const String &p_text = "");
 	void _api_request(const String &p_request, RequestType p_request_type, const String &p_arguments = "");
 	void _http_request_completed(int p_status, int p_code, const PoolStringArray &headers, const PoolByteArray &p_data);
 	void _http_download_completed(int p_status, int p_code, const PoolStringArray &headers, const PoolByteArray &p_data);
diff --git a/editor/plugins/canvas_item_editor_plugin.cpp b/editor/plugins/canvas_item_editor_plugin.cpp
index e63bc3ad9f..ad22c12372 100644
--- a/editor/plugins/canvas_item_editor_plugin.cpp
+++ b/editor/plugins/canvas_item_editor_plugin.cpp
@@ -4344,7 +4344,7 @@ CanvasItemEditor::CanvasItemEditor(EditorNode *p_editor) {
 	additive_selection = false;
 
 	// Update the menus checkboxes
-	set_state(get_state());
+	call_deferred("set_state", get_state());
 }
 
 CanvasItemEditor *CanvasItemEditor::singleton = NULL;
@@ -4730,6 +4730,7 @@ void CanvasItemEditorViewport::drop_data(const Point2 &p_point, const Variant &p
 			list.push_back(root_node);
 		} else {
 			drop_pos = p_point;
+			target_node = NULL;
 			_show_resource_type_selector();
 			return;
 		}
diff --git a/editor/plugins/spatial_editor_plugin.cpp b/editor/plugins/spatial_editor_plugin.cpp
index b855d2d4c4..cefc957ebf 100644
--- a/editor/plugins/spatial_editor_plugin.cpp
+++ b/editor/plugins/spatial_editor_plugin.cpp
@@ -2624,7 +2624,6 @@ void SpatialEditorViewport::_menu_option(int p_option) {
 			view_menu->get_popup()->set_item_checked(view_menu->get_popup()->get_item_index(VIEW_DISPLAY_WIREFRAME), false);
 			view_menu->get_popup()->set_item_checked(view_menu->get_popup()->get_item_index(VIEW_DISPLAY_OVERDRAW), false);
 			view_menu->get_popup()->set_item_checked(view_menu->get_popup()->get_item_index(VIEW_DISPLAY_SHADELESS), false);
-
 		} break;
 		case VIEW_DISPLAY_WIREFRAME: {
 
@@ -3756,6 +3755,10 @@ void SpatialEditor::select_gizmo_highlight_axis(int p_axis) {
 	}
 }
 
+int SpatialEditor::get_skeleton_visibility_state() const {
+	return view_menu->get_popup()->get_item_state(view_menu->get_popup()->get_item_index(MENU_VISIBILITY_SKELETON));
+}
+
 void SpatialEditor::update_transform_gizmo() {
 
 	List<Node *> &selection = editor_selection->get_selected_node_list();
@@ -3800,6 +3803,21 @@ void SpatialEditor::update_transform_gizmo() {
 	}
 }
 
+void _update_all_gizmos(Node *p_node) {
+	for (int i = p_node->get_child_count() - 1; 0 <= i; --i) {
+		Spatial *spatial_node = Object::cast_to<Spatial>(p_node->get_child(i));
+		if (spatial_node) {
+			spatial_node->update_gizmo();
+		}
+
+		_update_all_gizmos(p_node->get_child(i));
+	}
+}
+
+void SpatialEditor::update_all_gizmos() {
+	_update_all_gizmos(SceneTree::get_singleton()->get_root());
+}
+
 Object *SpatialEditor::_get_editor_data(Object *p_what) {
 
 	Spatial *sp = Object::cast_to<Spatial>(p_what);
@@ -4247,6 +4265,28 @@ void SpatialEditor::_menu_item_pressed(int p_option) {
 
 			_refresh_menu_icons();
 		} break;
+		case MENU_VISIBILITY_SKELETON: {
+
+			const int idx = view_menu->get_popup()->get_item_index(MENU_VISIBILITY_SKELETON);
+			view_menu->get_popup()->toggle_item_multistate(idx);
+
+			// Change icon
+			const int state = view_menu->get_popup()->get_item_state(idx);
+			switch (state) {
+				case 0:
+					view_menu->get_popup()->set_item_icon(idx, view_menu->get_popup()->get_icon("visibility_hidden"));
+					break;
+				case 1:
+					view_menu->get_popup()->set_item_icon(idx, view_menu->get_popup()->get_icon("visibility_visible"));
+					break;
+				case 2:
+					view_menu->get_popup()->set_item_icon(idx, view_menu->get_popup()->get_icon("visibility_xray"));
+					break;
+			}
+
+			update_all_gizmos();
+
+		} break;
 	}
 }
 
@@ -4699,6 +4739,7 @@ void SpatialEditor::_notification(int p_what) {
 		view_menu->get_popup()->set_item_icon(view_menu->get_popup()->get_item_index(MENU_VIEW_USE_3_VIEWPORTS), get_icon("Panels3", "EditorIcons"));
 		view_menu->get_popup()->set_item_icon(view_menu->get_popup()->get_item_index(MENU_VIEW_USE_3_VIEWPORTS_ALT), get_icon("Panels3Alt", "EditorIcons"));
 		view_menu->get_popup()->set_item_icon(view_menu->get_popup()->get_item_index(MENU_VIEW_USE_4_VIEWPORTS), get_icon("Panels4", "EditorIcons"));
+		view_menu->get_popup()->set_item_icon(view_menu->get_popup()->get_item_index(MENU_VISIBILITY_SKELETON), view_menu->get_popup()->get_icon("visibility_visible"));
 
 		_menu_item_pressed(MENU_VIEW_USE_1_VIEWPORT);
 
@@ -5035,6 +5076,9 @@ SpatialEditor::SpatialEditor(EditorNode *p_editor) {
 	p->add_separator();
 	p->add_shortcut(ED_SHORTCUT("spatial_editor/settings", TTR("Settings")), MENU_VIEW_CAMERA_SETTINGS);
 
+	p->add_separator();
+	p->add_multistate_item(TTR("Skeleton Gizmo visibility"), 3, 1, MENU_VISIBILITY_SKELETON);
+
 	p->set_item_checked(p->get_item_index(MENU_VIEW_ORIGIN), true);
 	p->set_item_checked(p->get_item_index(MENU_VIEW_GRID), true);
 
diff --git a/editor/plugins/spatial_editor_plugin.h b/editor/plugins/spatial_editor_plugin.h
index 0c2571017b..8369a5de54 100644
--- a/editor/plugins/spatial_editor_plugin.h
+++ b/editor/plugins/spatial_editor_plugin.h
@@ -92,7 +92,7 @@ class SpatialEditorViewport : public Control {
 		VIEW_DISPLAY_NORMAL,
 		VIEW_DISPLAY_WIREFRAME,
 		VIEW_DISPLAY_OVERDRAW,
-		VIEW_DISPLAY_SHADELESS,
+		VIEW_DISPLAY_SHADELESS
 	};
 
 public:
@@ -488,7 +488,8 @@ private:
 		MENU_VIEW_GRID,
 		MENU_VIEW_CAMERA_SETTINGS,
 		MENU_LOCK_SELECTED,
-		MENU_UNLOCK_SELECTED
+		MENU_UNLOCK_SELECTED,
+		MENU_VISIBILITY_SKELETON
 	};
 
 	Button *tool_button[TOOL_MAX];
@@ -591,7 +592,10 @@ public:
 	Ref<ArrayMesh> get_scale_gizmo(int idx) const { return scale_gizmo[idx]; }
 	Ref<ArrayMesh> get_scale_plane_gizmo(int idx) const { return scale_plane_gizmo[idx]; }
 
+	int get_skeleton_visibility_state() const;
+
 	void update_transform_gizmo();
+	void update_all_gizmos();
 
 	void select_gizmo_highlight_axis(int p_axis);
 	void set_custom_camera(Node *p_camera) { custom_camera = p_camera; }
diff --git a/editor/plugins/tile_set_editor_plugin.cpp b/editor/plugins/tile_set_editor_plugin.cpp
index eb2faa1ab1..b8c57fd959 100644
--- a/editor/plugins/tile_set_editor_plugin.cpp
+++ b/editor/plugins/tile_set_editor_plugin.cpp
@@ -343,7 +343,7 @@ AutotileEditor::AutotileEditor(EditorNode *p_editor) {
 	split->add_child(property_editor);
 
 	helper = memnew(AutotileEditorHelper(this));
-	property_editor->edit(helper);
+	property_editor->call_deferred("edit", helper);
 
 	// Editor
 
diff --git a/editor/property_editor.cpp b/editor/property_editor.cpp
index 446a0ea35d..59acd9ded9 100644
--- a/editor/property_editor.cpp
+++ b/editor/property_editor.cpp
@@ -70,7 +70,7 @@ void EditorResourceConversionPlugin::_bind_methods() {
 	mi.name = "_handles";
 	mi.return_val = PropertyInfo(Variant::BOOL, "");
 
-	BIND_VMETHOD(MethodInfo(Variant::BOOL, "_converts_to"));
+	BIND_VMETHOD(MethodInfo(Variant::STRING, "_converts_to"));
 }
 
 String EditorResourceConversionPlugin::converts_to() const {
@@ -647,7 +647,7 @@ bool CustomPropertyEditor::edit(Object *p_owner, const String &p_name, Variant::
 					}
 				}
 
-				if (type)
+				if (type != Variant::NIL)
 					property_select->select_property_from_basic_type(type, v);
 
 				updating = false;
@@ -1939,6 +1939,7 @@ CustomPropertyEditor::CustomPropertyEditor() {
 	type_button->get_popup()->connect("id_pressed", this, "_type_create_selected");
 
 	menu = memnew(PopupMenu);
+	menu->set_pass_on_modal_close_click(false);
 	add_child(menu);
 	menu->connect("id_pressed", this, "_menu_option");
 
@@ -2802,13 +2803,12 @@ void PropertyEditor::update_tree() {
 			TreeItem *sep = tree->create_item(root);
 			current_category = sep;
 			String type = p.name;
-			//*
+
 			if (has_icon(type, "EditorIcons"))
 				sep->set_icon(0, get_icon(type, "EditorIcons"));
 			else
 				sep->set_icon(0, get_icon("Object", "EditorIcons"));
 
-			//*/
 			sep->set_text(0, type);
 			sep->set_expand_right(0, true);
 			sep->set_selectable(0, false);
@@ -2934,38 +2934,36 @@ void PropertyEditor::update_tree() {
 		}
 
 		if (use_doc_hints) {
-			StringName setter;
-			StringName type;
-			if (ClassDB::get_setter_and_type_for_property(obj->get_class_name(), p.name, type, setter)) {
 
-				String descr;
-				bool found = false;
-				Map<StringName, Map<StringName, String> >::Element *E = descr_cache.find(type);
-				if (E) {
+			StringName classname = obj->get_class_name();
+			StringName propname = p.name;
+			String descr;
+			bool found = false;
 
-					Map<StringName, String>::Element *F = E->get().find(setter);
-					if (F) {
-						found = true;
-						descr = F->get();
-					}
+			Map<StringName, Map<StringName, String> >::Element *E = descr_cache.find(classname);
+			if (E) {
+				Map<StringName, String>::Element *F = E->get().find(propname);
+				if (F) {
+					found = true;
+					descr = F->get();
 				}
-				if (!found) {
+			}
 
-					DocData *dd = EditorHelp::get_doc_data();
-					Map<String, DocData::ClassDoc>::Element *E = dd->class_list.find(type);
-					if (E) {
-						for (int i = 0; i < E->get().methods.size(); i++) {
-							if (E->get().methods[i].name == setter.operator String()) {
-								descr = E->get().methods[i].description.strip_edges().word_wrap(80);
-							}
+			if (!found) {
+				DocData *dd = EditorHelp::get_doc_data();
+				Map<String, DocData::ClassDoc>::Element *E = dd->class_list.find(classname);
+				if (E) {
+					for (int i = 0; i < E->get().properties.size(); i++) {
+						if (E->get().properties[i].name == propname.operator String()) {
+							descr = E->get().properties[i].description.strip_edges().word_wrap(80);
 						}
 					}
-
-					descr_cache[type][setter] = descr;
 				}
 
-				item->set_tooltip(0, TTR("Property:") + " " + p.name + "\n\n" + descr);
+				descr_cache[classname][propname] = descr;
 			}
+
+			item->set_tooltip(0, TTR("Property:") + " " + p.name + "\n\n" + descr);
 		}
 
 		Dictionary d;
@@ -4279,6 +4277,7 @@ PropertyEditor::PropertyEditor() {
 	set_physics_process(true);
 
 	custom_editor = memnew(CustomPropertyEditor);
+	custom_editor->set_pass_on_modal_close_click(false);
 	add_child(custom_editor);
 
 	tree->connect("custom_popup_edited", this, "_custom_editor_request");
@@ -4365,7 +4364,7 @@ class SectionedPropertyEditorFilter : public Object {
 			PropertyInfo pi = E->get();
 			int sp = pi.name.find("/");
 
-			if (pi.name == "resource_path" || pi.name == "resource_name" || pi.name.begins_with("script/")) //skip resource stuff
+			if (pi.name == "resource_path" || pi.name == "resource_name" || pi.name == "resource_local_to_scene" || pi.name.begins_with("script/")) //skip resource stuff
 				continue;
 
 			if (sp == -1) {
@@ -4515,7 +4514,7 @@ void SectionedPropertyEditor::update_category_list() {
 		else if (!(pi.usage & PROPERTY_USAGE_EDITOR))
 			continue;
 
-		if (pi.name.find(":") != -1 || pi.name == "script" || pi.name == "resource_name" || pi.name == "resource_path")
+		if (pi.name.find(":") != -1 || pi.name == "script" || pi.name == "resource_name" || pi.name == "resource_path" || pi.name == "resource_local_to_scene")
 			continue;
 
 		if (search_box && search_box->get_text() != String() && pi.name.findn(search_box->get_text()) == -1)
@@ -4533,6 +4532,7 @@ void SectionedPropertyEditor::update_category_list() {
 		for (int i = 0; i < sc; i++) {
 
 			TreeItem *parent = section_map[metasection];
+			parent->set_custom_bg_color(0, get_color("prop_subsection", "Editor"));
 
 			if (i > 0) {
 				metasection += "/" + sectionarr[i];
@@ -4586,7 +4586,7 @@ SectionedPropertyEditor::SectionedPropertyEditor() {
 	search_box = NULL;
 
 	VBoxContainer *left_vb = memnew(VBoxContainer);
-	left_vb->set_custom_minimum_size(Size2(160, 0) * EDSCALE);
+	left_vb->set_custom_minimum_size(Size2(170, 0) * EDSCALE);
 	add_child(left_vb);
 
 	sections = memnew(Tree);
diff --git a/editor/property_selector.cpp b/editor/property_selector.cpp
index 86de7c56e1..77b73e4635 100644
--- a/editor/property_selector.cpp
+++ b/editor/property_selector.cpp
@@ -363,23 +363,6 @@ void PropertySelector::_item_selected() {
 
 			at_class = ClassDB::get_parent_class(at_class);
 		}
-
-		if (text == String()) {
-
-			StringName setter;
-			StringName type;
-			if (ClassDB::get_setter_and_type_for_property(class_type, name, type, setter)) {
-				Map<String, DocData::ClassDoc>::Element *E = dd->class_list.find(type);
-				if (E) {
-					for (int i = 0; i < E->get().methods.size(); i++) {
-						if (E->get().methods[i].name == setter.operator String()) {
-							text = E->get().methods[i].description;
-						}
-					}
-				}
-			}
-		}
-
 	} else {
 
 		String at_class = class_type;
@@ -516,6 +499,7 @@ void PropertySelector::select_property_from_script(const Ref<Script> &p_script,
 	search_box->grab_focus();
 	_update_search();
 }
+
 void PropertySelector::select_property_from_basic_type(Variant::Type p_type, const String &p_current) {
 
 	ERR_FAIL_COND(p_type == Variant::NIL);
diff --git a/editor/scene_tree_editor.cpp b/editor/scene_tree_editor.cpp
index 2c0981ca30..25924212fd 100644
--- a/editor/scene_tree_editor.cpp
+++ b/editor/scene_tree_editor.cpp
@@ -484,7 +484,8 @@ void SceneTreeEditor::_selected_changed() {
 void SceneTreeEditor::_deselect_items() {
 
 	// Clear currently elected items in scene tree dock.
-	editor_selection->clear();
+	if (editor_selection)
+		editor_selection->clear();
 }
 
 void SceneTreeEditor::_cell_multi_selected(Object *p_object, int p_cell, bool p_selected) {
diff --git a/editor/spatial_editor_gizmos.cpp b/editor/spatial_editor_gizmos.cpp
index 3ffc61cb45..f785b3e198 100644
--- a/editor/spatial_editor_gizmos.cpp
+++ b/editor/spatial_editor_gizmos.cpp
@@ -1316,6 +1316,34 @@ void SkeletonSpatialGizmo::redraw() {
 
 	Color gizmo_color = EDITOR_GET("editors/3d_gizmos/gizmo_colors/skeleton");
 	Ref<Material> material = create_material("skeleton_material", gizmo_color);
+	SpatialMaterial *sm = Object::cast_to<SpatialMaterial>(material.ptr());
+
+	{ // Reset
+		Color c(sm->get_albedo());
+		c.a = 1;
+		sm->set_albedo(c);
+	}
+	if (sm) {
+		switch (SpatialEditor::get_singleton()->get_skeleton_visibility_state()) {
+			case 0: {
+				// Hidden
+				Color c(sm->get_albedo());
+				c.a = 0;
+				sm->set_albedo(c);
+				sm->set_feature(SpatialMaterial::FEATURE_TRANSPARENT, true);
+			} break;
+			case 1:
+				// Visible
+				sm->set_feature(SpatialMaterial::FEATURE_TRANSPARENT, false);
+				sm->set_render_priority(SpatialMaterial::RENDER_PRIORITY_MIN);
+				sm->set_flag(SpatialMaterial::FLAG_DISABLE_DEPTH_TEST, false);
+				break;
+			case 2:
+				// x-ray
+				sm->set_on_top_of_alpha();
+				break;
+		}
+	}
 
 	Ref<SurfaceTool> surface_tool(memnew(SurfaceTool));
 
@@ -2943,10 +2971,10 @@ void SliderJointSpatialGizmo::redraw() {
 
 	float ll = p3d->get_param(SliderJoint::PARAM_ANGULAR_LIMIT_LOWER);
 	float ul = p3d->get_param(SliderJoint::PARAM_ANGULAR_LIMIT_UPPER);
-	float lll = -p3d->get_param(SliderJoint::PARAM_LINEAR_LIMIT_LOWER);
-	float lul = -p3d->get_param(SliderJoint::PARAM_LINEAR_LIMIT_UPPER);
+	float lll = p3d->get_param(SliderJoint::PARAM_LINEAR_LIMIT_LOWER);
+	float lul = p3d->get_param(SliderJoint::PARAM_LINEAR_LIMIT_UPPER);
 
-	if (lll > lul) {
+	if (lll <= lul) {
 
 		cursor_points.push_back(Vector3(lul, 0, 0));
 		cursor_points.push_back(Vector3(lll, 0, 0));
@@ -3139,8 +3167,8 @@ void Generic6DOFJointSpatialGizmo::redraw() {
 			case 0:
 				ll = p3d->get_param_x(Generic6DOFJoint::PARAM_ANGULAR_LOWER_LIMIT);
 				ul = p3d->get_param_x(Generic6DOFJoint::PARAM_ANGULAR_UPPER_LIMIT);
-				lll = -p3d->get_param_x(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
-				lul = -p3d->get_param_x(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
+				lll = p3d->get_param_x(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
+				lul = p3d->get_param_x(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
 				enable_ang = p3d->get_flag_x(Generic6DOFJoint::FLAG_ENABLE_ANGULAR_LIMIT);
 				enable_lin = p3d->get_flag_x(Generic6DOFJoint::FLAG_ENABLE_LINEAR_LIMIT);
 				a1 = 0;
@@ -3150,25 +3178,26 @@ void Generic6DOFJointSpatialGizmo::redraw() {
 			case 1:
 				ll = p3d->get_param_y(Generic6DOFJoint::PARAM_ANGULAR_LOWER_LIMIT);
 				ul = p3d->get_param_y(Generic6DOFJoint::PARAM_ANGULAR_UPPER_LIMIT);
-				lll = -p3d->get_param_y(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
-				lul = -p3d->get_param_y(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
+				lll = p3d->get_param_y(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
+				lul = p3d->get_param_y(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
 				enable_ang = p3d->get_flag_y(Generic6DOFJoint::FLAG_ENABLE_ANGULAR_LIMIT);
 				enable_lin = p3d->get_flag_y(Generic6DOFJoint::FLAG_ENABLE_LINEAR_LIMIT);
-				a1 = 2;
-				a2 = 0;
-				a3 = 1;
+
+				a1 = 1;
+				a2 = 2;
+				a3 = 0;
 				break;
 			case 2:
 				ll = p3d->get_param_z(Generic6DOFJoint::PARAM_ANGULAR_LOWER_LIMIT);
 				ul = p3d->get_param_z(Generic6DOFJoint::PARAM_ANGULAR_UPPER_LIMIT);
-				lll = -p3d->get_param_z(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
-				lul = -p3d->get_param_z(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
+				lll = p3d->get_param_z(Generic6DOFJoint::PARAM_LINEAR_LOWER_LIMIT);
+				lul = p3d->get_param_z(Generic6DOFJoint::PARAM_LINEAR_UPPER_LIMIT);
 				enable_ang = p3d->get_flag_z(Generic6DOFJoint::FLAG_ENABLE_ANGULAR_LIMIT);
 				enable_lin = p3d->get_flag_z(Generic6DOFJoint::FLAG_ENABLE_LINEAR_LIMIT);
 
-				a1 = 1;
-				a2 = 2;
-				a3 = 0;
+				a1 = 2;
+				a2 = 0;
+				a3 = 1;
 				break;
 		}
 
@@ -3190,7 +3219,7 @@ void Generic6DOFJointSpatialGizmo::redraw() {
 		what = v;              \
 	}
 
-		if (enable_lin && lll >= lul) {
+		if (enable_lin && lll <= lul) {
 
 			ADD_VTX(lul, 0, 0);
 			ADD_VTX(lll, 0, 0);
diff --git a/main/input_default.cpp b/main/input_default.cpp
index 7cc7521686..8c91a1a5de 100644
--- a/main/input_default.cpp
+++ b/main/input_default.cpp
@@ -540,20 +540,62 @@ static const char *s_ControllerMappings[] = {
 	"00f00300000000000000504944564944,RetroUSB.com RetroPad,a:b1,b:b5,x:b0,y:b4,back:b2,start:b3,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"00f0f100000000000000504944564944,RetroUSB.com Super RetroPort,a:b1,b:b5,x:b0,y:b4,back:b2,start:b3,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"02200090000000000000504944564944,8Bitdo NES30 PRO USB,a:b0,b:b1,x:b3,y:b4,leftshoulder:b6,rightshoulder:b7,lefttrigger:b8,righttrigger:b9,back:b10,start:b11,leftstick:b13,rightstick:b14,leftx:a0,lefty:a1,rightx:a3,righty:a4,dpup:h0.1,dpright:h0.2,dpdown:h0.4,dpleft:h0.8,",
+	"03000000380700008081000000000000,MADCATZ SFV Arcade FightStick Alpha PS4,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:b7,",
+	"0d0f1100000000000000504944564944,REAL ARCADE PRO.3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"0d0f2200000000000000504944564944,REAL ARCADE Pro.V3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"0d0f2700000000000000504944564944,FIGHTING STICK V3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"0d0f4000000000000000504944564944,Hori Fighting Stick Mini 3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b5,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b4,righttrigger:b6,",
 	"0d0f4900000000000000504944564944,Hatsune Miku Sho Controller,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"0d0f4d00000000000000504944564944,HORIPAD3 A,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"0d0f5e00000000000000504944564944,Fighting Commander 4,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b5,rightshoulder:b4,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a4,righttrigger:a3,",
+	"0d0f5f00000000000000504944564944,Fighting Commander 4,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"0d0f6a00000000000000504944564944,Real Arcade Pro.4,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a3,rightshoulder:b5,righttrigger:a4,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a5,",
+	"0d0f6b00000000000000504944564944,Real Arcade Pro.4,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"0d0f6e00000000000000504944564944,HORIPAD 4,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"0d0f7000000000000000504944564944,REAL ARCADE PRO.4 VLX,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"0d0f8400000000000000504944564944,Fighting Commander 5,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:b7,",
+	"0d0f8500000000000000504944564944,Fighting Commander 2016 PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"0d0f8700000000000000504944564944,Fighting Stick mini 4,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"0d0f8800000000000000504944564944,Fighting Stick mini 4,a:b1,b:b2,x:b0,y:b3,back:b9,guide:b12,start:b8,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"0d0f8a00000000000000504944564944,Real Arcade Pro.4,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a3,rightshoulder:b5,righttrigger:a4,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a5,",
+	"0d0f8b00000000000000504944564944,Real Arcade Pro.4,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"10008200000000000000504944564944,PS360+ v1.66,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:h0.4,lefttrigger:b6,righttrigger:b7,",
 	"10080100000000000000504944564944,PS1 USB,a:b2,b:b1,x:b3,y:b0,back:b8,start:b9,leftshoulder:b6,rightshoulder:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b4,righttrigger:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,",
 	"100801e5000000000000504944564944,NEXT Classic USB Game Controller,a:b0,b:b1,back:b8,start:b9,rightx:a2,righty:a3,leftx:a0,lefty:a1,",
 	"10080300000000000000504944564944,PS2 USB,a:b2,b:b1,y:b0,x:b3,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a4,righty:a2,lefttrigger:b4,righttrigger:b5,",
 	"10280900000000000000504944564944,8Bitdo SFC30 GamePad,a:b1,b:b0,y:b3,x:b4,start:b11,back:b10,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,",
+	"120cf60e000000000000504944564944,P4 Wired Gamepad,a:b1,b:b2,x:b0,y:b3,back:b12,guide:b8,start:b9,leftshoulder:b5,rightshoulder:b4,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:h0.0,lefttrigger:b7,righttrigger:b6,",
 	"20380900000000000000504944564944,8Bitdo NES30 PRO Wireless,a:b0,b:b1,x:b3,y:b4,leftshoulder:b6,rightshoulder:b7,lefttrigger:b8,righttrigger:b9,back:b10,start:b11,leftstick:b13,rightstick:b14,leftx:a0,lefty:a1,rightx:a3,righty:a4,dpup:h0.1,dpright:h0.2,dpdown:h0.4,dpleft:h0.8,",
+	"222c0020000000000000504944564944,QANBA DRONE ARCADE JOYSTICK,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:a3,righttrigger:a4,",
+	"222c0023000000000000504944564944,Qanba Obsidian Arcade Joystick PS4 Mode,a:b1,b:b2,x:b0,y:b3,back:b13,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
+	"222c0223000000000000504944564944,Qanba Obsidian Arcade Joystick PS3 Mode,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"25090017000000000000504944564944,HRAP2 on PS/SS/N64 Joypad to USB BOX,a:b2,b:b1,x:b3,y:b0,back:b9,start:b8,leftshoulder:b5,rightshoulder:b7,leftx:a0,lefty:a1,lefttrigger:b4,righttrigger:b6,",
+	"25090128000000000000504944564944,Mayflash Arcade Stick,a:b1,b:b2,x:b5,y:b6,back:b8,start:b9,leftshoulder:b0,rightshoulder:b3,leftx:a0,lefty:a1,rightx:h0.4,righty:h0.0,lefttrigger:b4,righttrigger:b7,",
 	"25090500000000000000504944564944,PS3 DualShock,a:b2,b:b1,back:b9,dpdown:h0.8,dpleft:h0.4,dpright:h0.2,dpup:h0.1,guide:,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b0,y:b3,",
 	"2509e803000000000000504944564944,Mayflash Wii Classic Controller,a:b1,b:b0,x:b3,y:b2,back:b8,guide:b10,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:b11,dpdown:b13,dpleft:b12,dpright:b14,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"28040140000000000000504944564944,GamePad Pro USB,a:b1,b:b2,x:b0,y:b3,back:b8,start:b9,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,lefttrigger:b6,righttrigger:b7,",
+	"300f0011000000000000504944564944,QanBa Arcade JoyStick 1008,x:b0,a:b1,b:b2,y:b3,back:b8,start:b10,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftx:a0,lefty:a1,",
 	"300f1001000000000000504944564944,Saitek P480 Rumble Pad,a:b2,b:b3,x:b0,y:b1,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b6,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b5,righttrigger:b7,",
+	"300f1201000000000000504944564944,Saitek Dual Analog Pad,a:b2,b:b3,x:b0,y:b1,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b6,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b5,righttrigger:b7,",
+	"300f1210000000000000504944564944,QanBa Joystick Plus,a:b0,b:b1,x:b2,y:b3,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,",
+	"300f1611000000000000504944564944,QanBa Arcade JoyStick 4018,a:b1,b:b2,x:b0,y:b3,back:b10,guide:b9,start:b8,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"341a0104000000000000504944564944,QanBa Joystick Q4RAF,a:b5,b:b6,x:b1,y:b2,back:b8,guide:b10,start:b9,leftshoulder:b0,rightshoulder:b3,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,lefttrigger:b4,righttrigger:b7,",
 	"341a0108000000000000504944564944,EXEQ RF USB Gamepad 8206,a:b0,b:b1,x:b2,y:b3,leftshoulder:b4,rightshoulder:b5,leftstick:b8,rightstick:b7,back:b8,start:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"341a3608000000000000504944564944,Afterglow PS3 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
 	"36280100000000000000504944564944,OUYA Controller,a:b0,b:b3,y:b2,x:b1,start:b14,guide:b15,leftstick:b6,rightstick:b7,leftshoulder:b4,rightshoulder:b5,dpup:b8,dpleft:b10,dpdown:b9,dpright:b11,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:b12,righttrigger:b13,",
+	"38071888000000000000504944564944,MadCatz SFIV FightStick PS3,a:b0,b:b1,x:b2,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b5,rightshoulder:b4,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b7,righttrigger:b6,",
+	"38073888000000000000504944564944,Madcatz Arcade Fightstick TE S+ PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"38075032000000000000504944564944,Mad Catz FightPad PRO PS3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"38075082000000000000504944564944,Mad Catz FightPad PRO PS4,x:b0,a:b1,b:b2,y:b3,back:b13,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a3,rightshoulder:b5,righttrigger:a4,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a5,",
+	"38076652000000000000504944564944,UnKnown,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
+	"38078034000000000000504944564944,Mad Catz TE2 PS3 Fightstick,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"38078084000000000000504944564944,Mad Catz TE2 PS4 Fightstick,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
+	"38078134000000000000504944564944,Mad Catz FightStick TE2+ PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b7,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b4,",
+	"38078184000000000000504944564944,Mad Catz FightStick TE2+ PS4,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b5,rightshoulder:b4,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a4,righttrigger:b7,",
+	"38078433000000000000504944564944,Mad Catz FightStick TE S+ PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
+	"38078483000000000000504944564944,Mad Catz FightStick TE S+ PS4,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:b6,",
+	"38078532000000000000504944564944,Madcatz Arcade Fightstick TE S PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"45130010000000000000504944564944,Generic USB Joystick,a:b0,b:b1,x:b2,y:b3,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"49190204000000000000504944564944,Ipega PG-9023,a:b0,b:b1,x:b3,y:b4,back:b10,start:b11,leftstick:b13,rightstick:b14,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:b8,righttrigger:b9,",
 	"4b12014d000000000000504944564944,NYKO AIRFLO,a:b0,b:b1,x:b2,y:b3,back:b8,guide:b10,start:b9,leftstick:a0,rightstick:a2,leftshoulder:a3,rightshoulder:b5,dpup:h0.1,dpdown:h0.0,dpleft:h0.8,dpright:h0.2,leftx:h0.6,lefty:h0.12,rightx:h0.9,righty:h0.4,lefttrigger:b6,righttrigger:b7,",
 	"4c056802000000000000504944564944,PS3 Controller,a:b14,b:b13,back:b0,dpdown:b6,dpleft:b7,dpright:b5,dpup:b4,guide:b16,leftshoulder:b10,leftstick:b1,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b2,righttrigger:b9,rightx:a2,righty:a3,start:b3,x:b15,y:b12,",
@@ -563,28 +605,37 @@ static const char *s_ControllerMappings[] = {
 	"4f0400b3000000000000504944564944,Thrustmaster Firestorm Dual Power,a:b0,b:b2,y:b3,x:b1,start:b10,guide:b8,back:b9,leftstick:b11,rightstick:b12,leftshoulder:b4,rightshoulder:b6,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b5,righttrigger:b7,",
 	"4f0415b3000000000000504944564944,Thrustmaster Dual Analog 3.2,x:b1,a:b0,b:b2,y:b3,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b6,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"4f0423b3000000000000504944564944,Dual Trigger 3-in-1,a:b1,b:b2,x:b0,y:b3,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:b7,",
+	"5e048e02000000000000504944564944,Controller (XBOX 360 For Windows),x:b2,a:b0,b:b1,y:b3,back:b6,start:b7,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,rightshoulder:b5,righttrigger:a2,leftstick:b8,rightstick:b9,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"63252305000000000000504944564944,USB Vibration Joystick (BM),x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"66660488000000000000504944564944,TigerGame PS/PS2 Game Controller Adapter,a:b2,b:b1,x:b3,y:b0,back:b9,start:b8,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:b12,dpdown:b14,dpleft:b15,dpright:b13,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b4,righttrigger:b5,",
 	"6d0416c2000000000000504944564944,Generic DirectInput Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
 	"6d0418c2000000000000504944564944,Logitech RumblePad 2 USB,x:b0,a:b1,b:b2,y:b3,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"6d0419c2000000000000504944564944,Logitech F710 Gamepad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
 	"6f0e1e01000000000000504944564944,Rock Candy Gamepad for PS3,a:b1,b:b2,x:b0,y:b3,back:b8,start:b9,guide:b12,leftshoulder:b4,rightshoulder:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,",
+	"6f0e2401000000000000504944564944,INJUSTICE FightStick for PS3,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,lefttrigger:b6,righttrigger:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2",
 	"79000018000000000000504944564944,Mayflash WiiU Pro Game Controller Adapter (DInput),a:b1,b:b2,x:b0,y:b3,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"79000600000000000000504944564944,G-Shark GS-GP702,a:b2,b:b1,x:b3,y:b0,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a4,lefttrigger:b6,righttrigger:b7,",
-	"79000600000000000000504944564944,NGS Phantom,a:b2,b:b3,y:b1,x:b0,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a4,lefttrigger:b6,righttrigger:b7,",
+	"79001100000000000000504944564944,Sega Saturn Gamepad,a:b1,b:b2,x:b4,y:b5,start:b8,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a4,lefttrigger:b3,righttrigger:b0,",
+	"79001b18000000000000504944564944,Venom Arcade Joystick,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b6,righttrigger:b7,",
 	"79004318000000000000504944564944,Mayflash GameCube Controller Adapter,a:b1,b:b2,x:b0,y:b3,back:b0,start:b9,guide:b0,leftshoulder:b4,rightshoulder:b7,leftstick:b0,rightstick:b0,leftx:a0,lefty:a1,rightx:a5,righty:a2,lefttrigger:a3,righttrigger:a4,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,",
-	"79000600000000000000504944564944,Generic Speedlink,a:b2,b:b1,y:b0,x:b3,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a4,lefttrigger:b6,righttrigger:b7,",
 	"83056020000000000000504944564944,iBuffalo USB 2-axis 8-button Gamepad,a:b1,b:b0,y:b2,x:b3,start:b7,back:b6,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
 	"88880803000000000000504944564944,PS3 Controller,a:b2,b:b1,back:b8,dpdown:h0.8,dpleft:h0.4,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b9,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:b7,rightx:a3,righty:a4,start:b11,x:b0,y:b3,",
 	"8f0e0300000000000000504944564944,Piranha xtreme,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
 	"8f0e0d31000000000000504944564944,Multilaser JS071 USB,a:b1,b:b2,y:b3,x:b0,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"8f0e1030000000000000504944564944,Mayflash USB Adapter for original Sega Saturn controller,a:b0,b:b1,x:b3,y:b4,start:b9,leftshoulder:b6,rightshoulder:b2,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b5,righttrigger:b7,",
 	"8f0e1200000000000000504944564944,Acme,x:b2,a:b0,b:b1,y:b3,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b6,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
 	"9000318000000000000504944564944,Mayflash Wiimote PC Adapter,a:b2,b:h0.4,x:b0,y:b1,back:b4,start:b5,guide:b11,leftshoulder:b6,rightshoulder:b3,leftx:a0,lefty:a1,",
+	"9b280500000000000000504944564944,Saturn_Adapter_2.0,a:b1,b:b2,x:b0,y:b3,start:b9,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,lefttrigger:b4,righttrigger:b5,",
 	"a3060cff000000000000504944564944,Saitek P2500,a:b2,b:b3,y:b1,x:b0,start:b4,guide:b10,back:b5,leftstick:b8,rightstick:b9,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"bd1215d0000000000000504944564944,Nintendo Retrolink USB Super SNES Classic Controller,y:b0,b:b1,a:b2,x:b3,leftshoulder:b4,rightshoulder:b5,start:b9,back:b8,leftx:a0,lefty:a1,",
 	"c0111352000000000000504944564944,Battalife Joystick,x:b4,a:b6,b:b7,y:b5,back:b2,start:b3,leftshoulder:b0,rightshoulder:b1,leftx:a0,lefty:a1,",
 	"c911f055000000000000504944564944,GAMEPAD,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,",
 	"d6206dca000000000000504944564944,PowerA Pro Ex,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.0,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"d8140862000000000000504944564944,HitBox Edition Cthulhu+,a:b1,b:b2,x:b0,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b5,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,lefttrigger:b4,righttrigger:b6,",
+	"d81d0b00000000000000504944564944,BUFFALO BSGP1601 Series ,x:b4,a:b5,b:b3,y:b2,back:b12,start:b13,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b8,lefttrigger:b6,rightshoulder:b9,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"d81d0f00000000000000504944564944,iBUFFALO BSGP1204 Series,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"d81d1000000000000000504944564944,iBUFFALO BSGP1204P Series,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"ff113133000000000000504944564944,Gembird JPD-DualForce,a:b2,b:b3,x:b0,y:b1,start:b9,back:b8,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a4,lefttrigger:b6,righttrigger:b7,leftstick:b10,rightstick:b11,",
-	"ff113133000000000000504944564944,SVEN X-PAD,a:b2,b:b3,y:b1,x:b0,start:b5,back:b4,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a4,lefttrigger:b8,righttrigger:b9,",
 	"ffff0000000000000000504944564944,GameStop Gamepad,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,",
 	"__XINPUT_DEVICE__,XInput Gamepad,a:b12,b:b13,x:b14,y:b15,start:b4,back:b5,leftstick:b6,rightstick:b7,leftshoulder:b8,rightshoulder:b9,dpup:b0,dpdown:b1,dpleft:b2,dpright:b3,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:a4,righttrigger:a5,",
 #endif
@@ -595,10 +646,12 @@ static const char *s_ControllerMappings[] = {
 	"050000005769696d6f74652028313800,Wii U Pro Controller,a:b16,b:b15,x:b18,y:b17,back:b7,guide:b8,start:b6,leftstick:b23,rightstick:b24,leftshoulder:b19,rightshoulder:b20,dpup:b11,dpdown:b12,dpleft:b13,dpright:b14,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b21,righttrigger:b22,",
 	"0d0f0000000000004d00000000000000,HORI Gem Pad 3,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"0d0f0000000000006600000000000000,HORIPAD FPS PLUS 4,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:a4,",
+	"10080000000000000100000000000000,Twin USB Joystick,a:b4,b:b2,x:b6,y:b0,back:b16,start:b18,leftstick:b20,rightstick:b22,leftshoulder:b12,rightshoulder:b14,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a2,rightx:a6,righty:a4,lefttrigger:b8,righttrigger:b10,",
 	"10280000000000000900000000000000,8Bitdo SFC30 GamePad,a:b1,b:b0,x:b4,y:b3,back:b10,start:b11,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"2509000000000000e803000000000000,Mayflash Wii Classic Controller,a:b1,b:b0,x:b3,y:b2,back:b8,guide:b10,start:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:b11,dpdown:b13,dpleft:b12,dpright:b14,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"351200000000000021ab000000000000,SFC30 Joystick,a:b1,b:b0,x:b4,y:b3,back:b10,start:b11,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"4c050000000000006802000000000000,PS3 Controller,a:b14,b:b13,back:b0,dpdown:b6,dpleft:b7,dpright:b5,dpup:b4,guide:b16,leftshoulder:b10,leftstick:b1,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b2,righttrigger:b9,rightx:a2,righty:a3,start:b3,x:b15,y:b12,",
+	"4c05000000000000a00b000000000000,Sony DualShock 4 Wireless Adaptor,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b13,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
 	"4c05000000000000c405000000000000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,",
 	"4c05000000000000cc09000000000000,Sony DualShock 4 V2,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b13,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
 	"4f0400000000000000b3000000000000,Thrustmaster Firestorm Dual Power,a:b0,b:b2,y:b3,x:b1,start:b10,guide:b8,back:b9,leftstick:b11,rightstick:,leftshoulder:b4,rightshoulder:b6,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b5,righttrigger:b7,",
@@ -607,9 +660,9 @@ static const char *s_ControllerMappings[] = {
 	"5e04000000000000dd02000000000000,Xbox One Wired Controller,x:b2,a:b0,b:b1,y:b3,back:b9,guide:b10,start:b8,dpleft:b13,dpdown:b12,dpright:b14,dpup:b11,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b6,rightstick:b7,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"5e04000000000000e002000000000000,Xbox Wireless Controller,x:b2,a:b0,b:b1,y:b3,back:b6,guide:b10,start:b7,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b8,rightstick:b9,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"5e04000000000000ea02000000000000,Xbox Wireless Controller,x:b2,a:b0,b:b1,y:b3,back:b9,guide:b10,start:b8,dpleft:b13,dpdown:b12,dpright:b14,dpup:b11,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b6,rightstick:b7,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
-	"6d0400000000000016c2000000000000,Logitech F310 Gamepad (DInput),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,", /* Guide button doesn't seem to be sent in DInput mode. */
+	"6d0400000000000016c2000000000000,Logitech F310 Gamepad (DInput),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
 	"6d0400000000000018c2000000000000,Logitech F510 Gamepad (DInput),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
-	"6d0400000000000019c2000000000000,Logitech Wireless Gamepad (DInput),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,", /* This includes F710 in DInput mode and the "Logitech Cordless RumblePad 2", at the very least. */
+	"6d0400000000000019c2000000000000,Logitech Wireless Gamepad (DInput),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,",
 	"6d040000000000001fc2000000000000,Logitech F710 Gamepad (XInput),a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,",
 	"79000000000000000018000000000000,Mayflash WiiU Pro Game Controller Adapter (DInput),a:b4,b:b8,x:b0,y:b12,back:b32,start:b36,leftstick:b40,rightstick:b44,leftshoulder:b16,rightshoulder:b20,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a4,rightx:a8,righty:a12,lefttrigger:b24,righttrigger:b28,",
 	"79000000000000000600000000000000,G-Shark GP-702,a:b2,b:b1,x:b3,y:b0,back:b8,start:b9,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:b6,righttrigger:b7,",
@@ -618,6 +671,7 @@ static const char *s_ControllerMappings[] = {
 	"83050000000000006020000000000000,iBuffalo USB 2-axis 8-button Gamepad,a:b1,b:b0,x:b3,y:b2,back:b6,start:b7,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
 	"891600000000000000fd000000000000,Razer Onza Tournament,a:b0,b:b1,y:b3,x:b2,start:b8,guide:b10,back:b9,leftstick:b6,rightstick:b7,leftshoulder:b4,rightshoulder:b5,dpup:b11,dpleft:b13,dpdown:b12,dpright:b14,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,",
 	"8f0e0000000000000300000000000000,Piranha xtreme,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
+	"AD1B00000000000001F9000000000000,Gamestop BB-070 X360 Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,",
 	"ad1b00000000000001f9000000000000,Gamestop BB-070 X360 Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,",
 	"b4040000000000000a01000000000000,Sega Saturn USB Gamepad,a:b0,b:b1,x:b3,y:b4,back:b5,guide:b2,start:b8,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"bd1200000000000015d0000000000000,Tomee SNES USB Controller,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
@@ -629,7 +683,7 @@ static const char *s_ControllerMappings[] = {
 	"0300000000f000000300000000010000,RetroUSB.com RetroPad,a:b1,b:b5,x:b0,y:b4,back:b2,start:b3,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"0300000000f00000f100000000010000,RetroUSB.com Super RetroPort,a:b1,b:b5,x:b0,y:b4,back:b2,start:b3,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"030000000d0f00000d00000000010000,hori,a:b0,b:b6,y:b2,x:b1,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,start:b9,guide:b10,back:b8,leftshoulder:b3,rightshoulder:b7,leftx:b4,lefty:b5,",
-	"030000000d0f00001000000011010000,HORI CO.,LTD. FIGHTING STICK 3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7",
+	"030000000d0f00001000000011010000,HORI CO. LTD. FIGHTING STICK 3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7",
 	"030000000d0f00002200000011010000,HORI CO.,LTD. REAL ARCADE Pro.V3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,",
 	"030000000d0f00004d00000011010000,HORI Gem Pad 3,x:b0,a:b1,b:b2,y:b3,back:b8,guide:b12,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
 	"03000000100800000100000010010000,Twin USB PS2 Adapter,a:b2,b:b1,y:b0,x:b3,start:b9,guide:,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b4,righttrigger:b5,",
@@ -638,13 +692,16 @@ static const char *s_ControllerMappings[] = {
 	"03000000250900000500000000010000,Sony PS2 pad with SmartJoy adapter,a:b2,b:b1,y:b0,x:b3,start:b8,back:b9,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b4,righttrigger:b5,",
 	"03000000260900008888000000010000,GameCube {WiseGroup USB box},a:b0,b:b2,y:b3,x:b1,start:b7,leftshoulder:,rightshoulder:b6,dpup:h0.1,dpleft:h0.8,rightstick:,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:a4,righttrigger:a5,",
 	"03000000280400000140000000010000,Gravis GamePad Pro USB ,x:b0,a:b1,b:b2,y:b3,back:b8,start:b9,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftx:a0,lefty:a1,",
+	"03000000300f00001001000010010000,Jess Tech Dual Analog Rumble Pad,x:b0,a:b2,b:b3,y:b1,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b6,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
 	"03000000341a000005f7000010010000,GameCube {HuiJia USB box},a:b1,b:b2,y:b3,x:b0,start:b9,guide:,back:,leftstick:,rightstick:,leftshoulder:,dpleft:b15,dpdown:b14,dpright:b13,leftx:a0,lefty:a1,rightx:a5,righty:a2,lefttrigger:a3,righttrigger:a4,rightshoulder:b7,dpup:b12,",
 	"03000000380700001647000010040000,Mad Catz Wired Xbox 360 Controller,x:b2,a:b0,b:b1,y:b3,back:b6,guide:b8,start:b7,dpleft:h0.8,dpdown:h0.0,dpdown:h0.4,dpright:h0.0,dpright:h0.2,dpup:h0.0,dpup:h0.1,leftshoulder:h0.0,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
+	"03000000451300000830000010010000,NYKO CORE,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:b7,",
 	"030000004c0500006802000011010000,PS3 Controller,a:b14,b:b13,back:b0,dpdown:b6,dpleft:b7,dpright:b5,dpup:b4,guide:b16,leftshoulder:b10,leftstick:b1,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b2,righttrigger:b9,rightx:a2,righty:a3,start:b3,x:b15,y:b12,",
 	"030000004c050000a00b000011010000,Sony DualShock 4 Wireless Adaptor,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b13,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
 	"030000004c050000c405000011010000,Sony DualShock 4,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
 	"030000004c050000c405000011810000,Sony Computer Entertainment Wireless Controller,leftx:a0,lefty:a1,dpdown:h0.4,rightstick:h0.1,rightshoulder:b5,rightx:a3,start:b9,righty:a4,dpleft:h0.8,lefttrigger:a2,x:b3,dpup:h0.1,back:b8,leftstick:b11,leftshoulder:b4,y:b2,a:b0,dpright:h0.2,righttrigger:a5,b:b1,",
 	"030000004c050000cc09000011010000,Sony DualShock 4 V2,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b13,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
+	"030000004c050000cc09000011810000,Sony DualShock 4 (CUH-ZCT2U) (USB),a:b0,b:b1,y:b2,x:b3,leftshoulder:b4,rightshoulder:b5,back:b8,start:b9,guide:b10,leftstick:b11,rightstick:b12,leftx:a0,lefty:a1,lefttrigger:a2,rightx:a3,righty:a4,righttrigger:a5,dpup:h0.1,dpright:h0.2,dpdown:h0.4,dpleft:h0.8,",
 	"030000004f04000000b3000010010000,Thrustmaster Firestorm Dual Power,a:b0,b:b2,y:b3,x:b1,start:b10,guide:b8,back:b9,leftstick:b11,rightstick:b12,leftshoulder:b4,rightshoulder:b6,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b5,righttrigger:b7,",
 	"030000004f04000008d0000000010000,Thrustmaster Run N Drive  Wireless,a:b1,b:b2,x:b0,y:b3,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:b6,righttrigger:b7,",
 	"030000004f04000009d0000000010000,Thrustmaster Run N Drive Wireless PS3,a:b1,b:b2,x:b0,y:b3,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
@@ -684,14 +741,15 @@ static const char *s_ControllerMappings[] = {
 	"030000006f0e00003001000001010000,EA Sports PS3 Controller,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"030000006f0e00003901000020060000,Afterglow Wired Controller for Xbox One,x:b2,a:b0,b:b1,y:b3,back:b6,guide:b8,start:b7,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"030000006f0e00004601000001010000,Rock Candy Wired Controller for Xbox One,a:b0,b:b1,x:b2,y:b3,leftshoulder:b4,rightshoulder:b5,back:b6,start:b7,guide:b8,leftstick:b9,rightstick:b10,lefttrigger:a2,righttrigger:a5,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
+	"03000000780000000600000010010000,Microntek USB Joystick,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftx:a0,lefty:a1,",
 	"03000000790000000600000010010000,DragonRise Inc. Generic USB Joystick,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"03000000790000001100000010010000,Retrolink Classic Controller,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
-	"03000000790000001100000010010000,RetroLink Saturn Classic Controller,x:b3,a:b0,b:b1,y:b4,back:b5,guide:b2,start:b8,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"03000000830500006020000010010000,iBuffalo USB 2-axis 8-button Gamepad,a:b1,b:b0,x:b3,y:b2,back:b6,start:b7,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
 	"030000008916000000fd000024010000,Razer Onza Tournament,a:b0,b:b1,y:b3,x:b2,start:b7,guide:b8,back:b6,leftstick:b9,rightstick:b10,leftshoulder:b4,rightshoulder:b5,dpup:b13,dpleft:b11,dpdown:b14,dpright:b12,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,",
 	"030000008916000001fd000024010000,Razer Onza Classic Edition,x:b2,a:b0,b:b1,y:b3,back:b6,guide:b8,start:b7,dpleft:b11,dpdown:b14,dpright:b12,dpup:b13,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 	"030000008f0e00000300000010010000,GreenAsia Inc. USB Joystick,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.0,dpdown:h0.4,dpright:h0.0,dpright:h0.2,dpup:h0.0,dpup:h0.1,leftshoulder:h0.0,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
 	"030000008f0e00001200000010010000,GreenAsia Inc. USB Joystick,x:b2,a:b0,b:b1,y:b3,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b6,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
+	"030000009b2800000300000001010000,raphnet.net 4nes4snes v1.5,x:b1,a:b0,b:b4,y:b5,back:b2,start:b3,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"03000000a30600000901000000010000,Saitek P880,a:b2,b:b3,y:b1,x:b0,leftstick:b8,rightstick:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b6,righttrigger:b7,",
 	"03000000a30600000c04000011010000,Saitek P2900 Wireless Pad,a:b1,b:b2,y:b3,x:b0,start:b12,guide:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b4,righttrigger:b5,",
 	"03000000a306000018f5000010010000,Saitek PLC Saitek P3200 Rumble Pad,x:b0,a:b1,b:b2,y:b3,back:b8,start:b9,dpleft:h0.8,dpdown:h0.0,dpdown:h0.4,dpright:h0.0,dpright:h0.2,dpup:h0.0,dpup:h0.1,leftshoulder:h0.0,leftshoulder:b4,lefttrigger:a2,rightshoulder:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
@@ -701,7 +759,9 @@ static const char *s_ControllerMappings[] = {
 	"03000000ad1b00002ef0000090040000,Mad Catz Fightpad SFxT,a:b0,b:b1,y:b3,x:b2,start:b7,guide:b8,back:b6,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,lefttrigger:a2,righttrigger:a5,",
 	"03000000ba2200002010000001010000,Jess Technology USB Game Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b3,y:b0,",
 	"03000000bd12000015d0000010010000,Tomee SNES USB Controller,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,",
-	"03000000c9110000f055000011010000,HJC Game GAMEPAD,platform:Linux,x:b2,a:b0,b:b1,y:b3,back:b4,back:b8,start:b9,dpleft:h0.8,dpdown:h0.0,dpdown:h0.4,dpright:h0.0,dpright:h0.2,dpup:h0.0,dpup:h0.1,leftshoulder:h0.0,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,",
+	"03000000c01600008704000011010000,Serial/Keyboard/Mouse/Joystick,a:b12,b:b10,x:b13,y:b11,back:b4,start:b5,leftstick:b14,rightstick:b15,leftshoulder:b9,rightshoulder:b8,dpup:b0,dpdown:b2,dpleft:b3,dpright:b1,leftx:a1,lefty:a0,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"03000000c0160000e105000001010000,Xin-Mo Xin-Mo Dual Arcade,y:b0,x:b1,b:b3,a:b4,leftshoulder:b2,rightshoulder:b5,back:b6,start:b7,guide:b9,dpleft:b13,dpdown:b12,dpright:b14,dpup:b11,leftx:a0,lefty:a1,",
+	"03000000c9110000f055000011010000,HJC Game GAMEPAD,x:b2,a:b0,b:b1,y:b3,back:b4,back:b8,start:b9,dpleft:h0.8,dpdown:h0.0,dpdown:h0.4,dpright:h0.0,dpright:h0.2,dpup:h0.0,dpup:h0.1,leftshoulder:h0.0,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a2,righty:a3,platform:Linux,",
 	"03000000d814000007cd000011010000,Toodles 2008 Chimp PC/PS3,a:b0,b:b1,y:b2,x:b3,start:b9,back:b8,leftshoulder:b4,rightshoulder:b5,leftx:a0,lefty:a1,lefttrigger:b6,righttrigger:b7,",
 	"03000000d81400000862000011010000,HitBox (PS3/PC) Analog Mode,a:b1,b:b2,y:b3,x:b0,start:b12,guide:b9,back:b8,leftshoulder:b4,rightshoulder:b5,lefttrigger:b6,righttrigger:b7,leftx:a0,lefty:a1,",
 	"03000000de280000ff11000001000000,Valve Streaming Gamepad,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,",
@@ -709,6 +769,7 @@ static const char *s_ControllerMappings[] = {
 	"03000000fd0500000030000000010000,InterAct GoPad I-73000 (Fighting Game Layout),a:b3,b:b4,y:b1,x:b0,start:b7,back:b6,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,",
 	"03000000fd0500002a26000000010000,3dfx InterAct HammerHead FX,leftx:a0,lefty:a1,dpdown:h0.4,rightstick:b5,rightshoulder:b7,rightx:a2,start:b11,righty:a3,dpleft:h0.8,lefttrigger:b8,x:b0,dpup:h0.1,back:b10,leftstick:b2,leftshoulder:b6,y:b1,a:b3,dpright:h0.2,righttrigger:b9,b:b4,",
 	"03000000ff1100003133000010010000,PC Game Controller,a:b2,b:b1,y:b0,x:b3,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
+	"03000000ff1100004133000010010000,GreenAsia Inc.USB Joystick,x:b3,a:b2,b:b1,y:b0,back:b8,start:b9,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,leftstick:b10,rightstick:b11,leftx:a0,lefty:a1,rightx:a3,righty:a2,",
 	"05000000010000000100000003000000,Nintendo Wiimote,a:b0,b:b1,y:b3,x:b2,start:b9,guide:b10,back:b8,leftstick:b11,rightstick:b12,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,",
 	"05000000102800000900000000010000,8Bitdo SFC30 GamePad,x:b4,a:b1,b:b0,y:b3,back:b10,start:b11,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"05000000362800000100000002010000,OUYA Game Controller,a:b0,b:b3,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b14,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,x:b1,y:b2,",
@@ -719,11 +780,15 @@ static const char *s_ControllerMappings[] = {
 	"050000004c0500006802000000010000,PS3 Controller (Bluetooth),a:b14,b:b13,y:b12,x:b15,start:b3,guide:b16,back:b0,leftstick:b1,rightstick:b2,leftshoulder:b10,rightshoulder:b11,dpup:b4,dpleft:b7,dpdown:b6,dpright:b5,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b8,righttrigger:b9,",
 	"050000004c050000c405000000010000,PS4 Controller (Bluetooth),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,",
 	"050000004c050000cc09000000010000,Sony DualShock 4 V2 BT,a:b1,b:b2,y:b3,x:b0,start:b9,guide:b12,back:b13,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a5,lefttrigger:a3,righttrigger:a4,",
+	"050000004c050000cc09000000810000,Sony DualShock 4 (CUH-ZCT2U) (Bluetooth),a:b0,b:b1,y:b2,x:b3,leftshoulder:b4,rightshoulder:b5,back:b8,start:b9,guide:b10,leftstick:b11,rightstick:b12,leftx:a0,lefty:a1,lefttrigger:a2,rightx:a3,righty:a4,righttrigger:a5,dpup:h0.1,dpright:h0.2,dpdown:h0.4,dpleft:h0.8,",
+	"05000000504c415953544154494f4e00,PS3 Controller (Bluetooth),a:b14,b:b13,y:b12,x:b15,start:b3,guide:b16,back:b0,leftstick:b1,rightstick:b2,leftshoulder:b10,rightshoulder:b11,dpup:b4,dpleft:b7,dpdown:b6,dpright:b5,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b8,righttrigger:b9,",
+	"050000005e040000e002000003090000,Xbox One Wireless Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,",
 	"050000007e0500003003000001000000,Nintendo Wii U Pro Controller,a:b0,b:b1,x:b3,y:b2,back:b8,start:b9,guide:b10,leftshoulder:b4,rightshoulder:b5,leftstick:b11,rightstick:b12,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,dpup:b13,dpleft:b15,dpdown:b14,dpright:b16,",
 	"05000000a00500003232000001000000,8Bitdo Zero GamePad,a:b0,b:b1,x:b3,y:b4,back:b10,start:b11,leftshoulder:b6,rightshoulder:b7,leftx:a0,lefty:a1,",
 	"05000000ac0500003232000001000000,VR-BOX,a:b0,b:b1,x:b2,y:b3,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a2,lefttrigger:b4,righttrigger:b5,",
 	"05000000d6200000ad0d000001000000,Moga Pro,a:b0,b:b1,y:b3,x:b2,start:b6,leftstick:b7,rightstick:b8,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:a5,righttrigger:a4,",
 	"060000004c0500006802000000010000,PS3 Controller (Bluetooth),a:b14,b:b13,y:b12,x:b15,start:b3,guide:b16,back:b0,leftstick:b1,rightstick:b2,leftshoulder:b10,rightshoulder:b11,dpup:b4,dpleft:b7,dpdown:b6,dpright:b5,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b8,righttrigger:b9,",
+	"06000000adde0000efbe000002010000,Hidromancer Game Controller,x:b2,a:b0,b:b1,y:b3,back:b6,guide:b8,start:b7,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a2,rightshoulder:b5,righttrigger:a5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,",
 #endif
 
 #if defined(__ANDROID__)
diff --git a/main/splash_sponsors.png b/main/splash_sponsors.png
new file mode 100644
index 0000000000..d8677f1749
--- /dev/null
+++ b/main/splash_sponsors.png
diff --git a/misc/hooks/pre-commit-clang-format b/misc/hooks/pre-commit-clang-format
index d2d65a7428..8ef4e27748 100755
--- a/misc/hooks/pre-commit-clang-format
+++ b/misc/hooks/pre-commit-clang-format
@@ -31,7 +31,7 @@ PARSE_EXTS=true
 
 # File types to parse. Only effective when PARSE_EXTS is true.
 # FILE_EXTS=".c .h .cpp .hpp"
-FILE_EXTS=".c .h .cpp .hpp .cc .hh .cxx .m .mm .inc"
+FILE_EXTS=".c .h .cpp .hpp .cc .hh .cxx .m .mm .inc *.java"
 
 # Use pygmentize instead of cat to parse diff with highlighting.
 # Install it with `pip install pygments` (Linux) or `easy_install Pygments` (Mac)
diff --git a/misc/travis/clang-format.sh b/misc/travis/clang-format.sh
index 2b30cf5ada..d1e37cc10e 100755
--- a/misc/travis/clang-format.sh
+++ b/misc/travis/clang-format.sh
@@ -11,7 +11,7 @@ else
     RANGE=HEAD
 fi
 
-FILES=$(git diff-tree --no-commit-id --name-only -r $RANGE | grep -v thirdparty/ | grep -E "\.(c|h|cpp|hpp|cc|hh|cxx|m|mm|inc)$")
+FILES=$(git diff-tree --no-commit-id --name-only -r $RANGE | grep -v thirdparty/ | grep -E "\.(c|h|cpp|hpp|cc|hh|cxx|m|mm|inc|java)$")
 echo "Checking files:\n$FILES"
 
 # create a random filename to store our generated patch
diff --git a/modules/bullet/bullet_physics_server.cpp b/modules/bullet/bullet_physics_server.cpp
index 339dccce33..b233edc0d4 100644
--- a/modules/bullet/bullet_physics_server.cpp
+++ b/modules/bullet/bullet_physics_server.cpp
@@ -723,16 +723,16 @@ void BulletPhysicsServer::body_set_axis_velocity(RID p_body, const Vector3 &p_ax
 	body->set_linear_velocity(v);
 }
 
-void BulletPhysicsServer::body_set_axis_lock(RID p_body, int axis, bool p_lock) {
+void BulletPhysicsServer::body_set_axis_lock(RID p_body, BodyAxis p_axis, bool p_lock) {
 	RigidBodyBullet *body = rigid_body_owner.get(p_body);
 	ERR_FAIL_COND(!body);
-	body->set_axis_lock(axis, p_lock);
+	body->set_axis_lock(p_axis, p_lock);
 }
 
-bool BulletPhysicsServer::body_get_axis_lock(RID p_body) const {
+bool BulletPhysicsServer::body_is_axis_locked(RID p_body, BodyAxis p_axis) const {
 	const RigidBodyBullet *body = rigid_body_owner.get(p_body);
 	ERR_FAIL_COND_V(!body, 0);
-	return body->get_axis_lock();
+	return body->is_axis_locked(p_axis);
 }
 
 void BulletPhysicsServer::body_add_collision_exception(RID p_body, RID p_body_b) {
diff --git a/modules/bullet/bullet_physics_server.h b/modules/bullet/bullet_physics_server.h
index ed5acb9041..8a10c87fc6 100644
--- a/modules/bullet/bullet_physics_server.h
+++ b/modules/bullet/bullet_physics_server.h
@@ -226,8 +226,8 @@ public:
 	virtual void body_apply_torque_impulse(RID p_body, const Vector3 &p_impulse);
 	virtual void body_set_axis_velocity(RID p_body, const Vector3 &p_axis_velocity);
 
-	virtual void body_set_axis_lock(RID p_body, int axis, bool p_lock);
-	virtual bool body_get_axis_lock(RID p_body) const;
+	virtual void body_set_axis_lock(RID p_body, BodyAxis p_axis, bool p_lock);
+	virtual bool body_is_axis_locked(RID p_body, BodyAxis p_axis) const;
 
 	virtual void body_add_collision_exception(RID p_body, RID p_body_b);
 	virtual void body_remove_collision_exception(RID p_body, RID p_body_b);
diff --git a/modules/bullet/collision_object_bullet.cpp b/modules/bullet/collision_object_bullet.cpp
index da3a4b73cf..88d4108f82 100644
--- a/modules/bullet/collision_object_bullet.cpp
+++ b/modules/bullet/collision_object_bullet.cpp
@@ -76,11 +76,17 @@ bool equal(real_t first, real_t second) {
 
 void CollisionObjectBullet::set_body_scale(const Vector3 &p_new_scale) {
 	if (!equal(p_new_scale[0], body_scale[0]) || !equal(p_new_scale[1], body_scale[1]) || !equal(p_new_scale[2], body_scale[2])) {
-		G_TO_B(p_new_scale, body_scale);
+		body_scale = p_new_scale;
 		on_body_scale_changed();
 	}
 }
 
+btVector3 CollisionObjectBullet::get_bt_body_scale() const {
+	btVector3 s;
+	G_TO_B(body_scale, s);
+	return s;
+}
+
 void CollisionObjectBullet::on_body_scale_changed() {
 }
 
@@ -160,6 +166,7 @@ void CollisionObjectBullet::set_transform(const Transform &p_global_transform) {
 Transform CollisionObjectBullet::get_transform() const {
 	Transform t;
 	B_TO_G(get_transform__bullet(), t);
+	t.basis.scale(body_scale);
 	return t;
 }
 
@@ -302,7 +309,7 @@ void RigidCollisionObjectBullet::on_shapes_changed() {
 		}
 	}
 
-	compoundShape->setLocalScaling(body_scale);
+	compoundShape->setLocalScaling(get_bt_body_scale());
 	compoundShape->recalculateLocalAabb();
 }
 
diff --git a/modules/bullet/collision_object_bullet.h b/modules/bullet/collision_object_bullet.h
index 51e48909e4..7d4659b64e 100644
--- a/modules/bullet/collision_object_bullet.h
+++ b/modules/bullet/collision_object_bullet.h
@@ -114,7 +114,7 @@ protected:
 	bool m_isStatic;
 	bool ray_pickable;
 	btCollisionObject *bt_collision_object;
-	btVector3 body_scale;
+	Vector3 body_scale;
 	SpaceBullet *space;
 
 	VSet<RID> exceptions;
@@ -146,6 +146,8 @@ public:
 	_FORCE_INLINE_ bool is_ray_pickable() const { return ray_pickable; }
 
 	void set_body_scale(const Vector3 &p_new_scale);
+	const Vector3 &get_body_scale() const { return body_scale; }
+	btVector3 get_bt_body_scale() const;
 	virtual void on_body_scale_changed();
 
 	void add_collision_exception(const CollisionObjectBullet *p_ignoreCollisionObject);
diff --git a/modules/bullet/cone_twist_joint_bullet.cpp b/modules/bullet/cone_twist_joint_bullet.cpp
index 7ae5e79645..738835b910 100644
--- a/modules/bullet/cone_twist_joint_bullet.cpp
+++ b/modules/bullet/cone_twist_joint_bullet.cpp
@@ -37,11 +37,21 @@
 
 ConeTwistJointBullet::ConeTwistJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, const Transform &rbAFrame, const Transform &rbBFrame) :
 		JointBullet() {
+
+	Transform scaled_AFrame(rbAFrame.scaled(rbA->get_body_scale()));
+	scaled_AFrame.basis.rotref_posscale_decomposition(scaled_AFrame.basis);
+
 	btTransform btFrameA;
-	G_TO_B(rbAFrame, btFrameA);
+	G_TO_B(scaled_AFrame, btFrameA);
+
 	if (rbB) {
+
+		Transform scaled_BFrame(rbBFrame.scaled(rbB->get_body_scale()));
+		scaled_BFrame.basis.rotref_posscale_decomposition(scaled_BFrame.basis);
+
 		btTransform btFrameB;
-		G_TO_B(rbBFrame, btFrameB);
+		G_TO_B(scaled_BFrame, btFrameB);
+
 		coneConstraint = bulletnew(btConeTwistConstraint(*rbA->get_bt_rigid_body(), *rbB->get_bt_rigid_body(), btFrameA, btFrameB));
 	} else {
 		coneConstraint = bulletnew(btConeTwistConstraint(*rbA->get_bt_rigid_body(), btFrameA));
diff --git a/modules/bullet/generic_6dof_joint_bullet.cpp b/modules/bullet/generic_6dof_joint_bullet.cpp
index 28928bd861..da09d4e12f 100644
--- a/modules/bullet/generic_6dof_joint_bullet.cpp
+++ b/modules/bullet/generic_6dof_joint_bullet.cpp
@@ -38,12 +38,20 @@
 Generic6DOFJointBullet::Generic6DOFJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, const Transform &frameInA, const Transform &frameInB, bool useLinearReferenceFrameA) :
 		JointBullet() {
 
+	Transform scaled_AFrame(frameInA.scaled(rbA->get_body_scale()));
+
+	scaled_AFrame.basis.rotref_posscale_decomposition(scaled_AFrame.basis);
+
 	btTransform btFrameA;
-	G_TO_B(frameInA, btFrameA);
+	G_TO_B(scaled_AFrame, btFrameA);
 
 	if (rbB) {
+		Transform scaled_BFrame(frameInB.scaled(rbB->get_body_scale()));
+
+		scaled_BFrame.basis.rotref_posscale_decomposition(scaled_BFrame.basis);
+
 		btTransform btFrameB;
-		G_TO_B(frameInB, btFrameB);
+		G_TO_B(scaled_BFrame, btFrameB);
 
 		sixDOFConstraint = bulletnew(btGeneric6DofConstraint(*rbA->get_bt_rigid_body(), *rbB->get_bt_rigid_body(), btFrameA, btFrameB, useLinearReferenceFrameA));
 	} else {
@@ -109,10 +117,12 @@ void Generic6DOFJointBullet::set_param(Vector3::Axis p_axis, PhysicsServer::G6DO
 	ERR_FAIL_INDEX(p_axis, 3);
 	switch (p_param) {
 		case PhysicsServer::G6DOF_JOINT_LINEAR_LOWER_LIMIT:
-			sixDOFConstraint->getTranslationalLimitMotor()->m_lowerLimit[p_axis] = p_value;
+			limits_lower[0][p_axis] = p_value;
+			set_flag(p_axis, PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_LINEAR_LIMIT, flags[p_axis][p_param]); // Reload bullet parameter
 			break;
 		case PhysicsServer::G6DOF_JOINT_LINEAR_UPPER_LIMIT:
-			sixDOFConstraint->getTranslationalLimitMotor()->m_upperLimit[p_axis] = p_value;
+			limits_upper[0][p_axis] = p_value;
+			set_flag(p_axis, PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_LINEAR_LIMIT, flags[p_axis][p_param]); // Reload bullet parameter
 			break;
 		case PhysicsServer::G6DOF_JOINT_LINEAR_LIMIT_SOFTNESS:
 			sixDOFConstraint->getTranslationalLimitMotor()->m_limitSoftness = p_value;
@@ -124,10 +134,12 @@ void Generic6DOFJointBullet::set_param(Vector3::Axis p_axis, PhysicsServer::G6DO
 			sixDOFConstraint->getTranslationalLimitMotor()->m_damping = p_value;
 			break;
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_LOWER_LIMIT:
-			sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_loLimit = p_value;
+			limits_lower[1][p_axis] = p_value;
+			set_flag(p_axis, PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_ANGULAR_LIMIT, flags[p_axis][p_param]); // Reload bullet parameter
 			break;
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_UPPER_LIMIT:
-			sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_hiLimit = p_value;
+			limits_upper[1][p_axis] = p_value;
+			set_flag(p_axis, PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_LINEAR_LIMIT, flags[p_axis][p_param]); // Reload bullet parameter
 			break;
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_LIMIT_SOFTNESS:
 			sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_limitSoftness = p_value;
@@ -159,9 +171,9 @@ real_t Generic6DOFJointBullet::get_param(Vector3::Axis p_axis, PhysicsServer::G6
 	ERR_FAIL_INDEX_V(p_axis, 3, 0.);
 	switch (p_param) {
 		case PhysicsServer::G6DOF_JOINT_LINEAR_LOWER_LIMIT:
-			return sixDOFConstraint->getTranslationalLimitMotor()->m_lowerLimit[p_axis];
+			return limits_lower[0][p_axis];
 		case PhysicsServer::G6DOF_JOINT_LINEAR_UPPER_LIMIT:
-			return sixDOFConstraint->getTranslationalLimitMotor()->m_upperLimit[p_axis];
+			return limits_upper[0][p_axis];
 		case PhysicsServer::G6DOF_JOINT_LINEAR_LIMIT_SOFTNESS:
 			return sixDOFConstraint->getTranslationalLimitMotor()->m_limitSoftness;
 		case PhysicsServer::G6DOF_JOINT_LINEAR_RESTITUTION:
@@ -169,9 +181,9 @@ real_t Generic6DOFJointBullet::get_param(Vector3::Axis p_axis, PhysicsServer::G6
 		case PhysicsServer::G6DOF_JOINT_LINEAR_DAMPING:
 			return sixDOFConstraint->getTranslationalLimitMotor()->m_damping;
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_LOWER_LIMIT:
-			return sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_loLimit;
+			return limits_lower[1][p_axis];
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_UPPER_LIMIT:
-			return sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_hiLimit;
+			return limits_upper[1][p_axis];
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_LIMIT_SOFTNESS:
 			return sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_limitSoftness;
 		case PhysicsServer::G6DOF_JOINT_ANGULAR_DAMPING:
@@ -194,48 +206,35 @@ real_t Generic6DOFJointBullet::get_param(Vector3::Axis p_axis, PhysicsServer::G6
 
 void Generic6DOFJointBullet::set_flag(Vector3::Axis p_axis, PhysicsServer::G6DOFJointAxisFlag p_flag, bool p_value) {
 	ERR_FAIL_INDEX(p_axis, 3);
+
+	flags[p_axis][p_flag] = p_value;
+
 	switch (p_flag) {
 		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_LINEAR_LIMIT:
-			if (p_value) {
-				if (!get_flag(p_axis, p_flag)) // avoid overwrite, if limited
-					sixDOFConstraint->setLimit(p_axis, 0, 0); // Limited
+			if (flags[p_axis][p_flag]) {
+				sixDOFConstraint->setLimit(p_axis, limits_lower[0][p_axis], limits_upper[0][p_axis]);
 			} else {
-				if (get_flag(p_axis, p_flag)) // avoid overwrite, if free
-					sixDOFConstraint->setLimit(p_axis, 0, -1); // Free
+				sixDOFConstraint->setLimit(p_axis, 0, -1); // Free
 			}
 			break;
-		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_ANGULAR_LIMIT: {
-			int angularAxis = 3 + p_axis;
-			if (p_value) {
-				if (!get_flag(p_axis, p_flag)) // avoid overwrite, if Limited
-					sixDOFConstraint->setLimit(angularAxis, 0, 0); // Limited
+		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_ANGULAR_LIMIT:
+			if (flags[p_axis][p_flag]) {
+				sixDOFConstraint->setLimit(p_axis + 3, limits_lower[1][p_axis], limits_upper[1][p_axis]);
 			} else {
-				if (get_flag(p_axis, p_flag)) // avoid overwrite, if free
-					sixDOFConstraint->setLimit(angularAxis, 0, -1); // Free
+				sixDOFConstraint->setLimit(p_axis + 3, 0, -1); // Free
 			}
 			break;
-		}
 		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_MOTOR:
-			//sixDOFConstraint->getTranslationalLimitMotor()->m_enableMotor[p_axis] = p_value;
-			sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_enableMotor = p_value;
+			sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_enableMotor = flags[p_axis][p_flag];
 			break;
 		default:
 			WARN_PRINT("This flag is not supported by Bullet engine");
+			return;
 	}
 }
 
 bool Generic6DOFJointBullet::get_flag(Vector3::Axis p_axis, PhysicsServer::G6DOFJointAxisFlag p_flag) const {
 	ERR_FAIL_INDEX_V(p_axis, 3, false);
-	switch (p_flag) {
-		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_LINEAR_LIMIT:
-			return sixDOFConstraint->getTranslationalLimitMotor()->isLimited(p_axis);
-		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_ANGULAR_LIMIT:
-			return sixDOFConstraint->getRotationalLimitMotor(p_axis)->isLimited();
-		case PhysicsServer::G6DOF_JOINT_FLAG_ENABLE_MOTOR:
-			return //sixDOFConstraint->getTranslationalLimitMotor()->m_enableMotor[p_axis] &&
-					sixDOFConstraint->getRotationalLimitMotor(p_axis)->m_enableMotor;
-		default:
-			WARN_PRINT("This flag is not supported by Bullet engine");
-			return false;
-	}
+
+	return flags[p_axis][p_flag];
 }
diff --git a/modules/bullet/generic_6dof_joint_bullet.h b/modules/bullet/generic_6dof_joint_bullet.h
index 0d47b823de..ba0ae08800 100644
--- a/modules/bullet/generic_6dof_joint_bullet.h
+++ b/modules/bullet/generic_6dof_joint_bullet.h
@@ -39,6 +39,11 @@ class RigidBodyBullet;
 class Generic6DOFJointBullet : public JointBullet {
 	class btGeneric6DofConstraint *sixDOFConstraint;
 
+	// First is linear second is angular
+	Vector3 limits_lower[2];
+	Vector3 limits_upper[2];
+	bool flags[3][PhysicsServer::G6DOF_JOINT_FLAG_MAX];
+
 public:
 	Generic6DOFJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, const Transform &frameInA, const Transform &frameInB, bool useLinearReferenceFrameA);
 
diff --git a/modules/bullet/godot_result_callbacks.h b/modules/bullet/godot_result_callbacks.h
index 5750dc2acd..9d2fb1fce4 100644
--- a/modules/bullet/godot_result_callbacks.h
+++ b/modules/bullet/godot_result_callbacks.h
@@ -50,14 +50,21 @@ struct GodotFilterCallback : public btOverlapFilterCallback {
 struct GodotClosestRayResultCallback : public btCollisionWorld::ClosestRayResultCallback {
 	const Set<RID> *m_exclude;
 	bool m_pickRay;
+	int m_shapeId;
 
 public:
 	GodotClosestRayResultCallback(const btVector3 &rayFromWorld, const btVector3 &rayToWorld, const Set<RID> *p_exclude) :
 			btCollisionWorld::ClosestRayResultCallback(rayFromWorld, rayToWorld),
 			m_exclude(p_exclude),
-			m_pickRay(false) {}
+			m_pickRay(false),
+			m_shapeId(0) {}
 
 	virtual bool needsCollision(btBroadphaseProxy *proxy0) const;
+
+	virtual btScalar addSingleResult(btCollisionWorld::LocalRayResult &rayResult, bool normalInWorldSpace) {
+		m_shapeId = rayResult.m_localShapeInfo->m_triangleIndex; // "m_triangleIndex" Is a odd name but contains the compound shape ID
+		return btCollisionWorld::ClosestRayResultCallback::addSingleResult(rayResult, normalInWorldSpace);
+	}
 };
 
 // store all colliding object
diff --git a/modules/bullet/hinge_joint_bullet.cpp b/modules/bullet/hinge_joint_bullet.cpp
index d3288807b3..ee0d6707d6 100644
--- a/modules/bullet/hinge_joint_bullet.cpp
+++ b/modules/bullet/hinge_joint_bullet.cpp
@@ -37,12 +37,20 @@
 
 HingeJointBullet::HingeJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, const Transform &frameA, const Transform &frameB) :
 		JointBullet() {
+
+	Transform scaled_AFrame(frameA.scaled(rbA->get_body_scale()));
+	scaled_AFrame.basis.rotref_posscale_decomposition(scaled_AFrame.basis);
+
 	btTransform btFrameA;
-	G_TO_B(frameA, btFrameA);
+	G_TO_B(scaled_AFrame, btFrameA);
 
 	if (rbB) {
+
+		Transform scaled_BFrame(frameB.scaled(rbB->get_body_scale()));
+		scaled_BFrame.basis.rotref_posscale_decomposition(scaled_BFrame.basis);
+
 		btTransform btFrameB;
-		G_TO_B(frameB, btFrameB);
+		G_TO_B(scaled_BFrame, btFrameB);
 
 		hingeConstraint = bulletnew(btHingeConstraint(*rbA->get_bt_rigid_body(), *rbB->get_bt_rigid_body(), btFrameA, btFrameB));
 	} else {
@@ -58,14 +66,14 @@ HingeJointBullet::HingeJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, c
 
 	btVector3 btPivotA;
 	btVector3 btAxisA;
-	G_TO_B(pivotInA, btPivotA);
-	G_TO_B(axisInA, btAxisA);
+	G_TO_B(pivotInA * rbA->get_body_scale(), btPivotA);
+	G_TO_B(axisInA * rbA->get_body_scale(), btAxisA);
 
 	if (rbB) {
 		btVector3 btPivotB;
 		btVector3 btAxisB;
-		G_TO_B(pivotInB, btPivotB);
-		G_TO_B(axisInB, btAxisB);
+		G_TO_B(pivotInB * rbB->get_body_scale(), btPivotB);
+		G_TO_B(axisInB * rbB->get_body_scale(), btAxisB);
 
 		hingeConstraint = bulletnew(btHingeConstraint(*rbA->get_bt_rigid_body(), *rbB->get_bt_rigid_body(), btPivotA, btPivotB, btAxisA, btAxisB));
 	} else {
diff --git a/modules/bullet/pin_joint_bullet.cpp b/modules/bullet/pin_joint_bullet.cpp
index 8c74fcbc94..665e825967 100644
--- a/modules/bullet/pin_joint_bullet.cpp
+++ b/modules/bullet/pin_joint_bullet.cpp
@@ -40,8 +40,8 @@ PinJointBullet::PinJointBullet(RigidBodyBullet *p_body_a, const Vector3 &p_pos_a
 
 		btVector3 btPivotA;
 		btVector3 btPivotB;
-		G_TO_B(p_pos_a, btPivotA);
-		G_TO_B(p_pos_b, btPivotB);
+		G_TO_B(p_pos_a * p_body_a->get_body_scale(), btPivotA);
+		G_TO_B(p_pos_b * p_body_b->get_body_scale(), btPivotB);
 		p2pConstraint = bulletnew(btPoint2PointConstraint(*p_body_a->get_bt_rigid_body(),
 				*p_body_b->get_bt_rigid_body(),
 				btPivotA,
diff --git a/modules/bullet/rigid_body_bullet.cpp b/modules/bullet/rigid_body_bullet.cpp
index 843bdab31f..669b2c3f0c 100644
--- a/modules/bullet/rigid_body_bullet.cpp
+++ b/modules/bullet/rigid_body_bullet.cpp
@@ -198,6 +198,8 @@ void RigidBodyBullet::KinematicUtilities::copyAllOwnerShapes() {
 
 	const CollisionObjectBullet::ShapeWrapper *shape_wrapper;
 
+	btVector3 owner_body_scale(owner->get_bt_body_scale());
+
 	for (int i = shapes_count - 1; 0 <= i; --i) {
 		shape_wrapper = &shapes_wrappers[i];
 		if (!shape_wrapper->active) {
@@ -210,28 +212,29 @@ void RigidBodyBullet::KinematicUtilities::copyAllOwnerShapes() {
 		switch (shape_wrapper->shape->get_type()) {
 			case PhysicsServer::SHAPE_SPHERE: {
 				SphereShapeBullet *sphere = static_cast<SphereShapeBullet *>(shape_wrapper->shape);
-				kin_shape_ref = ShapeBullet::create_shape_sphere(sphere->get_radius() * owner->body_scale[0] + safe_margin);
+				kin_shape_ref = ShapeBullet::create_shape_sphere(sphere->get_radius() * owner_body_scale[0] + safe_margin);
 				break;
 			}
 			case PhysicsServer::SHAPE_BOX: {
 				BoxShapeBullet *box = static_cast<BoxShapeBullet *>(shape_wrapper->shape);
-				kin_shape_ref = ShapeBullet::create_shape_box((box->get_half_extents() * owner->body_scale) + btVector3(safe_margin, safe_margin, safe_margin));
+				kin_shape_ref = ShapeBullet::create_shape_box((box->get_half_extents() * owner_body_scale) + btVector3(safe_margin, safe_margin, safe_margin));
 				break;
 			}
 			case PhysicsServer::SHAPE_CAPSULE: {
 				CapsuleShapeBullet *capsule = static_cast<CapsuleShapeBullet *>(shape_wrapper->shape);
-				kin_shape_ref = ShapeBullet::create_shape_capsule(capsule->get_radius() * owner->body_scale[0] + safe_margin, capsule->get_height() * owner->body_scale[1] + safe_margin);
+
+				kin_shape_ref = ShapeBullet::create_shape_capsule(capsule->get_radius() * owner_body_scale[0] + safe_margin, capsule->get_height() * owner_body_scale[1] + safe_margin);
 				break;
 			}
 			case PhysicsServer::SHAPE_CONVEX_POLYGON: {
 				ConvexPolygonShapeBullet *godot_convex = static_cast<ConvexPolygonShapeBullet *>(shape_wrapper->shape);
 				kin_shape_ref = ShapeBullet::create_shape_convex(godot_convex->vertices);
-				kin_shape_ref->setLocalScaling(owner->body_scale + btVector3(safe_margin, safe_margin, safe_margin));
+				kin_shape_ref->setLocalScaling(owner_body_scale + btVector3(safe_margin, safe_margin, safe_margin));
 				break;
 			}
 			case PhysicsServer::SHAPE_RAY: {
 				RayShapeBullet *godot_ray = static_cast<RayShapeBullet *>(shape_wrapper->shape);
-				kin_shape_ref = ShapeBullet::create_shape_ray(godot_ray->length * owner->body_scale[1] + safe_margin);
+				kin_shape_ref = ShapeBullet::create_shape_ray(godot_ray->length * owner_body_scale[1] + safe_margin);
 				break;
 			}
 			default:
@@ -253,6 +256,7 @@ void RigidBodyBullet::KinematicUtilities::just_delete_shapes(int new_size) {
 RigidBodyBullet::RigidBodyBullet() :
 		RigidCollisionObjectBullet(CollisionObjectBullet::TYPE_RIGID_BODY),
 		kinematic_utilities(NULL),
+		locked_axis(0),
 		gravity_scale(1),
 		mass(1),
 		linearDamp(0),
@@ -277,7 +281,7 @@ RigidBodyBullet::RigidBodyBullet() :
 	setupBulletCollisionObject(btBody);
 
 	set_mode(PhysicsServer::BODY_MODE_RIGID);
-	set_axis_lock(0, locked_axis[0]);
+	reload_axis_lock();
 
 	areasWhereIam.resize(maxAreasWhereIam);
 	for (int i = areasWhereIam.size() - 1; 0 <= i; --i) {
@@ -498,25 +502,25 @@ void RigidBodyBullet::set_mode(PhysicsServer::BodyMode p_mode) {
 	switch (p_mode) {
 		case PhysicsServer::BODY_MODE_KINEMATIC:
 			mode = PhysicsServer::BODY_MODE_KINEMATIC;
-			set_axis_lock(0, locked_axis[0]); // Reload axis lock
+			reload_axis_lock();
 			_internal_set_mass(0);
 			init_kinematic_utilities();
 			break;
 		case PhysicsServer::BODY_MODE_STATIC:
 			mode = PhysicsServer::BODY_MODE_STATIC;
-			set_axis_lock(0, locked_axis[0]); // Reload axis lock
+			reload_axis_lock();
 			_internal_set_mass(0);
 			break;
 		case PhysicsServer::BODY_MODE_RIGID: {
 			mode = PhysicsServer::BODY_MODE_RIGID;
-			set_axis_lock(0, locked_axis[0]); // Reload axis lock
+			reload_axis_lock();
 			_internal_set_mass(0 == mass ? 1 : mass);
 			scratch_space_override_modificator();
 			break;
 		}
 		case PhysicsServer::BODY_MODE_CHARACTER: {
 			mode = PhysicsServer::BODY_MODE_CHARACTER;
-			set_axis_lock(0, locked_axis[0]); // Reload axis lock
+			reload_axis_lock();
 			_internal_set_mass(0 == mass ? 1 : mass);
 			scratch_space_override_modificator();
 			break;
@@ -655,25 +659,31 @@ Vector3 RigidBodyBullet::get_applied_torque() const {
 	return gTotTorq;
 }
 
-void RigidBodyBullet::set_axis_lock(int axis, bool p_lock) {
-	locked_axis[axis] = p_lock;
+void RigidBodyBullet::set_axis_lock(PhysicsServer::BodyAxis p_axis, bool lock) {
+	if (lock) {
+		locked_axis |= p_axis;
+	} else {
+		locked_axis &= ~p_axis;
+	}
 
-	btBody->setLinearFactor(btVector3(locked_axis[0] ? 0 : 1., locked_axis[1] ? 0 : 1., locked_axis[2] ? 0 : 1.));
-	if (locked_axis[0] || locked_axis[1] || locked_axis[2])
-		btBody->setAngularFactor(btVector3(locked_axis[0] ? 1. : 0, locked_axis[1] ? 1. : 0, locked_axis[2] ? 1. : 0));
-	else
-		btBody->setAngularFactor(btVector3(1., 1., 1.));
+	reload_axis_lock();
+}
 
+bool RigidBodyBullet::is_axis_locked(PhysicsServer::BodyAxis p_axis) const {
+	return locked_axis & p_axis;
+}
+
+void RigidBodyBullet::reload_axis_lock() {
+
+	btBody->setLinearFactor(btVector3(!is_axis_locked(PhysicsServer::BODY_AXIS_LINEAR_X), !is_axis_locked(PhysicsServer::BODY_AXIS_LINEAR_Y), !is_axis_locked(PhysicsServer::BODY_AXIS_LINEAR_Z)));
 	if (PhysicsServer::BODY_MODE_CHARACTER == mode) {
-		/// When character lock angular
+		/// When character angular is always locked
 		btBody->setAngularFactor(btVector3(0., 0., 0.));
+	} else {
+		btBody->setAngularFactor(btVector3(!is_axis_locked(PhysicsServer::BODY_AXIS_ANGULAR_X), !is_axis_locked(PhysicsServer::BODY_AXIS_ANGULAR_Y), !is_axis_locked(PhysicsServer::BODY_AXIS_ANGULAR_Z)));
 	}
 }
 
-bool RigidBodyBullet::get_axis_lock() const {
-	return locked_axis;
-}
-
 void RigidBodyBullet::set_continuous_collision_detection(bool p_enable) {
 	if (p_enable) {
 		// This threshold enable CCD if the object moves more than
diff --git a/modules/bullet/rigid_body_bullet.h b/modules/bullet/rigid_body_bullet.h
index fde8b21e17..c0eb148e24 100644
--- a/modules/bullet/rigid_body_bullet.h
+++ b/modules/bullet/rigid_body_bullet.h
@@ -184,9 +184,9 @@ private:
 	KinematicUtilities *kinematic_utilities;
 
 	PhysicsServer::BodyMode mode;
-	bool locked_axis[3] = { false, false, false };
 	GodotMotionState *godotMotionState;
 	btRigidBody *btBody;
+	uint16_t locked_axis;
 	real_t mass;
 	real_t gravity_scale;
 	real_t linearDamp;
@@ -269,8 +269,9 @@ public:
 	void set_applied_torque(const Vector3 &p_torque);
 	Vector3 get_applied_torque() const;
 
-	void set_axis_lock(int axis, bool p_lock);
-	bool get_axis_lock() const;
+	void set_axis_lock(PhysicsServer::BodyAxis p_axis, bool lock);
+	bool is_axis_locked(PhysicsServer::BodyAxis p_axis) const;
+	void reload_axis_lock();
 
 	/// Doc:
 	/// http://www.bulletphysics.org/mediawiki-1.5.8/index.php?title=Anti_tunneling_by_Motion_Clamping
diff --git a/modules/bullet/slider_joint_bullet.cpp b/modules/bullet/slider_joint_bullet.cpp
index f1d60679ec..cfcd0b57f6 100644
--- a/modules/bullet/slider_joint_bullet.cpp
+++ b/modules/bullet/slider_joint_bullet.cpp
@@ -37,11 +37,20 @@
 
 SliderJointBullet::SliderJointBullet(RigidBodyBullet *rbA, RigidBodyBullet *rbB, const Transform &frameInA, const Transform &frameInB) :
 		JointBullet() {
+
+	Transform scaled_AFrame(frameInA.scaled(rbA->get_body_scale()));
+	scaled_AFrame.basis.rotref_posscale_decomposition(scaled_AFrame.basis);
+
 	btTransform btFrameA;
-	G_TO_B(frameInA, btFrameA);
+	G_TO_B(scaled_AFrame, btFrameA);
+
 	if (rbB) {
+
+		Transform scaled_BFrame(frameInB.scaled(rbB->get_body_scale()));
+		scaled_BFrame.basis.rotref_posscale_decomposition(scaled_BFrame.basis);
+
 		btTransform btFrameB;
-		G_TO_B(frameInB, btFrameB);
+		G_TO_B(scaled_BFrame, btFrameB);
 		sliderConstraint = bulletnew(btSliderConstraint(*rbA->get_bt_rigid_body(), *rbB->get_bt_rigid_body(), btFrameA, btFrameB, true));
 
 	} else {
diff --git a/modules/bullet/space_bullet.cpp b/modules/bullet/space_bullet.cpp
index c1f6e81734..3ce4b294db 100644
--- a/modules/bullet/space_bullet.cpp
+++ b/modules/bullet/space_bullet.cpp
@@ -97,7 +97,7 @@ bool BulletPhysicsDirectSpaceState::intersect_ray(const Vector3 &p_from, const V
 		B_TO_G(btResult.m_hitNormalWorld.normalize(), r_result.normal);
 		CollisionObjectBullet *gObj = static_cast<CollisionObjectBullet *>(btResult.m_collisionObject->getUserPointer());
 		if (gObj) {
-			r_result.shape = 0;
+			r_result.shape = btResult.m_shapeId;
 			r_result.rid = gObj->get_self();
 			r_result.collider_id = gObj->get_instance_id();
 			r_result.collider = 0 == r_result.collider_id ? NULL : ObjectDB::get_instance(r_result.collider_id);
diff --git a/modules/gdnative/register_types.cpp b/modules/gdnative/register_types.cpp
index 365def75bc..1cb35ec006 100644
--- a/modules/gdnative/register_types.cpp
+++ b/modules/gdnative/register_types.cpp
@@ -99,12 +99,16 @@ static Set<String> get_gdnative_singletons(EditorFileSystemDirectory *p_dir) {
 }
 
 static void actual_discoverer_handler() {
+
 	EditorFileSystemDirectory *dir = EditorFileSystem::get_singleton()->get_filesystem();
 
 	Set<String> file_paths = get_gdnative_singletons(dir);
 
 	bool changed = false;
-	Array current_files = ProjectSettings::get_singleton()->get("gdnative/singletons");
+	Array current_files;
+	if (ProjectSettings::get_singleton()->has_setting("gdnative/singletons")) {
+		current_files = ProjectSettings::get_singleton()->get("gdnative/singletons");
+	}
 	Array files;
 	files.resize(file_paths.size());
 	int i = 0;
@@ -128,7 +132,6 @@ static void actual_discoverer_handler() {
 	if (changed) {
 
 		ProjectSettings::get_singleton()->set("gdnative/singletons", files);
-
 		ProjectSettings::get_singleton()->save();
 	}
 }
diff --git a/modules/gdscript/gdscript_function.cpp b/modules/gdscript/gdscript_function.cpp
index d2ce318f82..ee23f0ea0f 100644
--- a/modules/gdscript/gdscript_function.cpp
+++ b/modules/gdscript/gdscript_function.cpp
@@ -515,7 +515,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 					} else {
 						v = "of type '" + _get_var_type(index) + "'";
 					}
-					err_text = "Invalid set index " + v + " (on base: '" + _get_var_type(dst) + "') with value of type '"+_get_var_type(value)+"'";
+					err_text = "Invalid set index " + v + " (on base: '" + _get_var_type(dst) + "') with value of type '" + _get_var_type(value) + "'";
 					OPCODE_BREAK;
 				}
 #endif
@@ -574,7 +574,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a
 #ifdef DEBUG_ENABLED
 				if (!valid) {
 					String err_type;
-					err_text = "Invalid set index '" + String(*index) + "' (on base: '" + _get_var_type(dst) + "') with value of type '"+_get_var_type(value)+"'.";
+					err_text = "Invalid set index '" + String(*index) + "' (on base: '" + _get_var_type(dst) + "') with value of type '" + _get_var_type(value) + "'.";
 					OPCODE_BREAK;
 				}
 #endif
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp
index 599f204184..36ae61e388 100644
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -1140,6 +1140,7 @@ GDScriptParser::Node *GDScriptParser::_parse_expression(Node *p_parent, bool p_s
 			bool unary = false;
 			bool ternary = false;
 			bool error = false;
+			bool right_to_left = false;
 
 			switch (expression[i].op) {
 
@@ -1194,11 +1195,13 @@ GDScriptParser::Node *GDScriptParser::_parse_expression(Node *p_parent, bool p_s
 				case OperatorNode::OP_TERNARY_IF:
 					priority = 14;
 					ternary = true;
+					right_to_left = true;
 					break;
 				case OperatorNode::OP_TERNARY_ELSE:
 					priority = 14;
 					error = true;
-					break; // Errors out when found without IF (since IF would consume it)
+					// Rigth-to-left should be false in this case, otherwise it would always error.
+					break;
 
 				case OperatorNode::OP_ASSIGN: priority = 15; break;
 				case OperatorNode::OP_ASSIGN_ADD: priority = 15; break;
@@ -1218,13 +1221,13 @@ GDScriptParser::Node *GDScriptParser::_parse_expression(Node *p_parent, bool p_s
 				}
 			}
 
-			if (priority < min_priority) {
+			if (priority < min_priority || (right_to_left && priority == min_priority)) {
+				// < is used for left to right (default)
+				// <= is used for right to left
 				if (error) {
 					_set_error("Unexpected operator");
 					return NULL;
 				}
-				// < is used for left to right (default)
-				// <= is used for right to left
 				next_op = i;
 				min_priority = priority;
 				is_unary = unary;
diff --git a/modules/webp/SCsub b/modules/webp/SCsub
index f9295fed47..ea7af1bf9e 100644
--- a/modules/webp/SCsub
+++ b/modules/webp/SCsub
@@ -26,9 +26,6 @@ if env['builtin_libwebp']:
         "dsp/alpha_processing_neon.c",
         "dsp/alpha_processing_sse2.c",
         "dsp/alpha_processing_sse41.c",
-        "dsp/argb.c",
-        "dsp/argb_mips_dsp_r2.c",
-        "dsp/argb_sse2.c",
         "dsp/cost.c",
         "dsp/cost_mips32.c",
         "dsp/cost_mips_dsp_r2.c",
@@ -36,6 +33,9 @@ if env['builtin_libwebp']:
         "dsp/cpu.c",
         "dsp/dec.c",
         "dsp/dec_clip_tables.c",
+        "dsp/ssim.c",
+        "dsp/ssim_sse2.c",
+        "dsp/yuv_neon.c",
         "dsp/dec_mips32.c",
         "dsp/dec_mips_dsp_r2.c",
         "dsp/dec_msa.c",
@@ -84,6 +84,7 @@ if env['builtin_libwebp']:
         "dsp/yuv_sse2.c",
         "enc/alpha_enc.c",
         "enc/analysis_enc.c",
+        "enc/backward_references_cost_enc.c",
         "enc/backward_references_enc.c",
         "enc/config_enc.c",
         "enc/cost_enc.c",
@@ -122,10 +123,10 @@ if env['builtin_libwebp']:
         "utils/thread_utils.c",
         "utils/utils.c",
     ]
-    thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+    thirdparty_sources = [thirdparty_dir + "src/" + file for file in thirdparty_sources]
 
     env_webp.add_source_files(env.modules_sources, thirdparty_sources)
-    env_webp.Append(CPPPATH=[thirdparty_dir])
+    env_webp.Append(CPPPATH=[thirdparty_dir, thirdparty_dir + "src/"])
 
 # Godot source files
 env_webp.add_source_files(env.modules_sources, "*.cpp")
diff --git a/platform/android/export/export.cpp b/platform/android/export/export.cpp
index 67e00f4952..255413bf2c 100644
--- a/platform/android/export/export.cpp
+++ b/platform/android/export/export.cpp
@@ -1557,12 +1557,15 @@ public:
 			encode_uint32(cl.size(), &clf[0]);
 			for (int i = 0; i < cl.size(); i++) {
 
+				print_line(itos(i) + " param: " + cl[i]);
 				CharString txt = cl[i].utf8();
 				int base = clf.size();
-				clf.resize(base + 4 + txt.length());
-				encode_uint32(txt.length(), &clf[base]);
-				copymem(&clf[base + 4], txt.ptr(), txt.length());
-				print_line(itos(i) + " param: " + cl[i]);
+				int length = txt.length();
+				if (!length)
+					continue;
+				clf.resize(base + 4 + length);
+				encode_uint32(length, &clf[base]);
+				copymem(&clf[base + 4], txt.ptr(), length);
 			}
 
 			zip_fileinfo zipfi = get_zip_fileinfo();
diff --git a/platform/android/java/src/org/godotengine/godot/Dictionary.java b/platform/android/java/src/org/godotengine/godot/Dictionary.java
index ed91fedd85..2bc9c083aa 100644
--- a/platform/android/java/src/org/godotengine/godot/Dictionary.java
+++ b/platform/android/java/src/org/godotengine/godot/Dictionary.java
@@ -32,7 +32,6 @@ package org.godotengine.godot;
 import java.util.HashMap;
 import java.util.Set;
 
-
 public class Dictionary extends HashMap<String, Object> {
 
 	protected String[] keys_cache;
@@ -40,7 +39,7 @@ public class Dictionary extends HashMap<String, Object> {
 	public String[] get_keys() {
 
 		String[] ret = new String[size()];
-		int i=0;
+		int i = 0;
 		Set<String> keys = keySet();
 		for (String key : keys) {
 
@@ -54,7 +53,7 @@ public class Dictionary extends HashMap<String, Object> {
 	public Object[] get_values() {
 
 		Object[] ret = new Object[size()];
-		int i=0;
+		int i = 0;
 		Set<String> keys = keySet();
 		for (String key : keys) {
 
@@ -71,7 +70,7 @@ public class Dictionary extends HashMap<String, Object> {
 
 	public void set_values(Object[] vals) {
 
-		int i=0;
+		int i = 0;
 		for (String key : keys_cache) {
 			put(key, vals[i]);
 			i++;
diff --git a/platform/android/java/src/org/godotengine/godot/Godot.java b/platform/android/java/src/org/godotengine/godot/Godot.java
index 4daf06142d..3939ceb2e7 100644
--- a/platform/android/java/src/org/godotengine/godot/Godot.java
+++ b/platform/android/java/src/org/godotengine/godot/Godot.java
@@ -90,35 +90,34 @@ import android.os.Bundle;
 import android.os.Messenger;
 import android.os.SystemClock;
 
-
-public class Godot extends Activity implements SensorEventListener, IDownloaderClient
-{
+public class Godot extends Activity implements SensorEventListener, IDownloaderClient {
 
 	static final int MAX_SINGLETONS = 64;
 	private IStub mDownloaderClientStub;
-    private IDownloaderService mRemoteService;
-    private TextView mStatusText;
-    private TextView mProgressFraction;
-    private TextView mProgressPercent;
-    private TextView mAverageSpeed;
-    private TextView mTimeRemaining;
-    private ProgressBar mPB;
-
-    private View mDashboard;
-    private View mCellMessage;
-
-    private Button mPauseButton;
-    private Button mWiFiSettingsButton;
-
-    private boolean use_32_bits=false;
-    private boolean use_immersive=false;
-    private boolean mStatePaused;
-    private int mState;
-	private boolean keep_screen_on=true;
+	private IDownloaderService mRemoteService;
+	private TextView mStatusText;
+	private TextView mProgressFraction;
+	private TextView mProgressPercent;
+	private TextView mAverageSpeed;
+	private TextView mTimeRemaining;
+	private ProgressBar mPB;
+
+	private View mDashboard;
+	private View mCellMessage;
+
+	private Button mPauseButton;
+	private Button mWiFiSettingsButton;
+
+	private boolean use_32_bits = false;
+	private boolean use_immersive = false;
+	private boolean mStatePaused;
+	private int mState;
+	private boolean keep_screen_on = true;
 
 	static private Intent mCurrentIntent;
 
-	@Override public void onNewIntent(Intent intent) {
+	@Override
+	public void onNewIntent(Intent intent) {
 		mCurrentIntent = intent;
 	}
 
@@ -127,43 +126,43 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 	}
 
 	private void setState(int newState) {
-        if (mState != newState) {
-            mState = newState;
-            mStatusText.setText(Helpers.getDownloaderStringResourceIDFromState(newState));
-        }
-    }
-
-    private void setButtonPausedState(boolean paused) {
-        mStatePaused = paused;
-        int stringResourceID = paused ? com.godot.game.R.string.text_button_resume :
-        	com.godot.game.R.string.text_button_pause;
-        mPauseButton.setText(stringResourceID);
-    }
+		if (mState != newState) {
+			mState = newState;
+			mStatusText.setText(Helpers.getDownloaderStringResourceIDFromState(newState));
+		}
+	}
+
+	private void setButtonPausedState(boolean paused) {
+		mStatePaused = paused;
+		int stringResourceID = paused ? com.godot.game.R.string.text_button_resume :
+										com.godot.game.R.string.text_button_pause;
+		mPauseButton.setText(stringResourceID);
+	}
 
 	static public class SingletonBase {
 
 		protected void registerClass(String p_name, String[] p_methods) {
 
-			GodotLib.singleton(p_name,this);
+			GodotLib.singleton(p_name, this);
 
 			Class clazz = getClass();
 			Method[] methods = clazz.getDeclaredMethods();
 			for (Method method : methods) {
-				boolean found=false;
-				Log.d("XXX","METHOD: %s\n" + method.getName());
+				boolean found = false;
+				Log.d("XXX", "METHOD: %s\n" + method.getName());
 
 				for (String s : p_methods) {
-				Log.d("XXX", "METHOD CMP WITH: %s\n" + s);
+					Log.d("XXX", "METHOD CMP WITH: %s\n" + s);
 					if (s.equals(method.getName())) {
-						found=true;
-						Log.d("XXX","METHOD CMP VALID");
+						found = true;
+						Log.d("XXX", "METHOD CMP VALID");
 						break;
 					}
 				}
 				if (!found)
 					continue;
 
-				Log.d("XXX","METHOD FOUND: %s\n" + method.getName());
+				Log.d("XXX", "METHOD FOUND: %s\n" + method.getName());
 
 				List<String> ptr = new ArrayList<String>();
 
@@ -175,17 +174,13 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				String[] pt = new String[ptr.size()];
 				ptr.toArray(pt);
 
-				GodotLib.method(p_name,method.getName(),method.getReturnType().getName(),pt);
-
-
+				GodotLib.method(p_name, method.getName(), method.getReturnType().getName(), pt);
 			}
 
-			Godot.singletons[Godot.singleton_count++]=this;
+			Godot.singletons[Godot.singleton_count++] = this;
 		}
 
 		protected void onMainActivityResult(int requestCode, int resultCode, Intent data) {
-
-
 		}
 
 		protected void onMainPause() {}
@@ -200,22 +195,20 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		public void registerMethods() {}
 	}
 
-/*
+	/*
 	protected List<SingletonBase> singletons = new ArrayList<SingletonBase>();
 	protected void instanceSingleton(SingletonBase s) {
 
 		s.registerMethods();
 		singletons.add(s);
 	}
-
-*/
+	*/
 
 	private String[] command_line;
 	private boolean use_apk_expansion;
 
 	public GodotView mView;
-	private boolean godot_initialized=false;
-
+	private boolean godot_initialized = false;
 
 	private SensorManager mSensorManager;
 	private Sensor mAccelerometer;
@@ -226,36 +219,34 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 	public FrameLayout layout;
 	public RelativeLayout adLayout;
 
-
 	static public GodotIO io;
 
 	public static void setWindowTitle(String title) {
 		//setTitle(title);
 	}
 
-
 	static SingletonBase singletons[] = new SingletonBase[MAX_SINGLETONS];
-	static int singleton_count=0;
-
+	static int singleton_count = 0;
 
 	public interface ResultCallback {
 		public void callback(int requestCode, int resultCode, Intent data);
-	};
+	}
 	public ResultCallback result_callback;
 
 	private PaymentsManager mPaymentsManager = null;
 
-	@Override protected void onActivityResult (int requestCode, int resultCode, Intent data) {
-		if(requestCode == PaymentsManager.REQUEST_CODE_FOR_PURCHASE){
+	@Override
+	protected void onActivityResult(int requestCode, int resultCode, Intent data) {
+		if (requestCode == PaymentsManager.REQUEST_CODE_FOR_PURCHASE) {
 			mPaymentsManager.processPurchaseResponse(resultCode, data);
-		}else if (result_callback != null) {
+		} else if (result_callback != null) {
 			result_callback.callback(requestCode, resultCode, data);
 			result_callback = null;
 		};
 
-		for(int i=0;i<singleton_count;i++) {
+		for (int i = 0; i < singleton_count; i++) {
 
-			singletons[i].onMainActivityResult(requestCode,resultCode,data);
+			singletons[i].onMainActivityResult(requestCode, resultCode, data);
 		}
 	};
 
@@ -265,38 +256,38 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		//setContentView(mView);
 
 		layout = new FrameLayout(this);
-		layout.setLayoutParams(new LayoutParams(LayoutParams.FILL_PARENT,LayoutParams.FILL_PARENT));
+		layout.setLayoutParams(new LayoutParams(LayoutParams.FILL_PARENT, LayoutParams.FILL_PARENT));
 		setContentView(layout);
 
 		// GodotEditText layout
 		GodotEditText edittext = new GodotEditText(this);
-		   edittext.setLayoutParams(new ViewGroup.LayoutParams(LayoutParams.FILL_PARENT,LayoutParams.WRAP_CONTENT));
-        // ...add to FrameLayout
-		   layout.addView(edittext);
+		edittext.setLayoutParams(new ViewGroup.LayoutParams(LayoutParams.FILL_PARENT, LayoutParams.WRAP_CONTENT));
+		// ...add to FrameLayout
+		layout.addView(edittext);
 
-		mView = new GodotView(getApplication(),io,use_gl2,use_32_bits, this);
-		layout.addView(mView,new LayoutParams(LayoutParams.FILL_PARENT,LayoutParams.FILL_PARENT));
+		mView = new GodotView(getApplication(), io, use_gl2, use_32_bits, this);
+		layout.addView(mView, new LayoutParams(LayoutParams.FILL_PARENT, LayoutParams.FILL_PARENT));
 		edittext.setView(mView);
 		io.setEdit(edittext);
 
 		final Godot godot = this;
 		mView.getViewTreeObserver().addOnGlobalLayoutListener(new ViewTreeObserver.OnGlobalLayoutListener() {
-				@Override
-				public void onGlobalLayout() {
-					Point fullSize = new Point();
-					godot.getWindowManager().getDefaultDisplay().getSize(fullSize);
-					Rect gameSize = new Rect();
-					godot.mView.getWindowVisibleDisplayFrame(gameSize);
-
-					final int keyboardHeight = fullSize.y - gameSize.bottom;
-					Log.d("GODOT", "setVirtualKeyboardHeight: " + keyboardHeight);
-					GodotLib.setVirtualKeyboardHeight(keyboardHeight);
-				}
+			@Override
+			public void onGlobalLayout() {
+				Point fullSize = new Point();
+				godot.getWindowManager().getDefaultDisplay().getSize(fullSize);
+				Rect gameSize = new Rect();
+				godot.mView.getWindowVisibleDisplayFrame(gameSize);
+
+				final int keyboardHeight = fullSize.y - gameSize.bottom;
+				Log.d("GODOT", "setVirtualKeyboardHeight: " + keyboardHeight);
+				GodotLib.setVirtualKeyboardHeight(keyboardHeight);
+			}
 		});
 
 		// Ad layout
 		adLayout = new RelativeLayout(this);
-		adLayout.setLayoutParams(new LayoutParams(LayoutParams.FILL_PARENT,LayoutParams.FILL_PARENT));
+		adLayout.setLayoutParams(new LayoutParams(LayoutParams.FILL_PARENT, LayoutParams.FILL_PARENT));
 		layout.addView(adLayout);
 
 		final String[] current_command_line = command_line;
@@ -313,12 +304,11 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				});
 			}
 		});
-
 	}
 
 	public void setKeepScreenOn(final boolean p_enabled) {
 		keep_screen_on = p_enabled;
-		if (mView != null){
+		if (mView != null) {
 			runOnUiThread(new Runnable() {
 				@Override
 				public void run() {
@@ -335,12 +325,12 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				AlertDialog.Builder builder = new AlertDialog.Builder(getInstance());
 				builder.setMessage(message).setTitle(title);
 				builder.setPositiveButton(
-					"OK",
-					new DialogInterface.OnClickListener() {
-						public void onClick(DialogInterface dialog, int id) {
-							dialog.cancel();
-						}
-					});
+						"OK",
+						new DialogInterface.OnClickListener() {
+							public void onClick(DialogInterface dialog, int id) {
+								dialog.cancel();
+							}
+						});
 				AlertDialog dialog = builder.create();
 				dialog.show();
 			}
@@ -349,91 +339,86 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 	private static Godot _self;
 
-	public static Godot getInstance(){
+	public static Godot getInstance() {
 		return Godot._self;
 	}
 
-
 	private String[] getCommandLine() {
-            InputStream is;
-            try {
-		is = getAssets().open("_cl_");
-                byte[] len = new byte[4];
-                int r = is.read(len);
-		if (r<4) {
-                    Log.d("XXX","**ERROR** Wrong cmdline length.\n");
-		    Log.d("GODOT", "**ERROR** Wrong cmdline length.\n");
-                    return new String[0];
-                }
-		int argc=((int)(len[3]&0xFF)<<24) | ((int)(len[2]&0xFF)<<16) | ((int)(len[1]&0xFF)<<8) | ((int)(len[0]&0xFF));
-                String[] cmdline = new String[argc];
-
-                for(int i=0;i<argc;i++) {
-                    r = is.read(len);
-                    if (r<4) {
-
-			Log.d("GODOT", "**ERROR** Wrong cmdline param length.\n");
-                        return new String[0];
-                    }
-		    int strlen=((int)(len[3]&0xFF)<<24) | ((int)(len[2]&0xFF)<<16) | ((int)(len[1]&0xFF)<<8) | ((int)(len[0]&0xFF));
-                    if (strlen>65535) {
-			Log.d("GODOT", "**ERROR** Wrong command len\n");
-                        return new String[0];
-                    }
-		    byte[] arg = new byte[strlen];
-                    r = is.read(arg);
-		    if (r==strlen) {
-                        cmdline[i]=new String(arg,"UTF-8");
-		    }
+		InputStream is;
+		try {
+			is = getAssets().open("_cl_");
+			byte[] len = new byte[4];
+			int r = is.read(len);
+			if (r < 4) {
+				Log.d("XXX", "**ERROR** Wrong cmdline length.\n");
+				Log.d("GODOT", "**ERROR** Wrong cmdline length.\n");
+				return new String[0];
+			}
+			int argc = ((int)(len[3] & 0xFF) << 24) | ((int)(len[2] & 0xFF) << 16) | ((int)(len[1] & 0xFF) << 8) | ((int)(len[0] & 0xFF));
+			String[] cmdline = new String[argc];
+
+			for (int i = 0; i < argc; i++) {
+				r = is.read(len);
+				if (r < 4) {
+
+					Log.d("GODOT", "**ERROR** Wrong cmdline param length.\n");
+					return new String[0];
+				}
+				int strlen = ((int)(len[3] & 0xFF) << 24) | ((int)(len[2] & 0xFF) << 16) | ((int)(len[1] & 0xFF) << 8) | ((int)(len[0] & 0xFF));
+				if (strlen > 65535) {
+					Log.d("GODOT", "**ERROR** Wrong command len\n");
+					return new String[0];
+				}
+				byte[] arg = new byte[strlen];
+				r = is.read(arg);
+				if (r == strlen) {
+					cmdline[i] = new String(arg, "UTF-8");
+				}
 			}
 			return cmdline;
 		} catch (Exception e) {
-		e.printStackTrace();
-		Log.d("GODOT", "**ERROR** Exception " + e.getClass().getName() + ":" + e.getMessage());
+			e.printStackTrace();
+			Log.d("GODOT", "**ERROR** Exception " + e.getClass().getName() + ":" + e.getMessage());
 			return new String[0];
 		}
-
-
 	}
 
-
 	String expansion_pack_path;
 
-
 	private void initializeGodot() {
 
-		if (expansion_pack_path!=null) {
+		if (expansion_pack_path != null) {
 
 			String[] new_cmdline;
-			int cll=0;
-			if (command_line!=null) {
-			        Log.d("GODOT", "initializeGodot: command_line: is not null" );
-				new_cmdline = new String[ command_line.length + 2 ];
-				cll=command_line.length;
-				for(int i=0;i<command_line.length;i++) {
-					new_cmdline[i]=command_line[i];
+			int cll = 0;
+			if (command_line != null) {
+				Log.d("GODOT", "initializeGodot: command_line: is not null");
+				new_cmdline = new String[command_line.length + 2];
+				cll = command_line.length;
+				for (int i = 0; i < command_line.length; i++) {
+					new_cmdline[i] = command_line[i];
 				}
 			} else {
-			        Log.d("GODOT", "initializeGodot: command_line: is null" );
-				new_cmdline = new String[ 2 ];
+				Log.d("GODOT", "initializeGodot: command_line: is null");
+				new_cmdline = new String[2];
 			}
 
-			new_cmdline[cll]="--main_pack";
-			new_cmdline[cll+1]=expansion_pack_path;
-			command_line=new_cmdline;
+			new_cmdline[cll] = "--main_pack";
+			new_cmdline[cll + 1] = expansion_pack_path;
+			command_line = new_cmdline;
 		}
 
 		io = new GodotIO(this);
 		io.unique_id = Secure.getString(getContentResolver(), Secure.ANDROID_ID);
-		GodotLib.io=io;
-		Log.d("GODOT", "command_line is null? " + ((command_line == null)?"yes":"no"));
+		GodotLib.io = io;
+		Log.d("GODOT", "command_line is null? " + ((command_line == null) ? "yes" : "no"));
 		/*if(command_line != null){
 		    Log.d("GODOT", "Command Line:");
 		    for(int w=0;w <command_line.length;w++){
 		        Log.d("GODOT","   " + command_line[w]);
 		    }
 		}*/
-		mSensorManager = (SensorManager) getSystemService(Context.SENSOR_SERVICE);
+		mSensorManager = (SensorManager)getSystemService(Context.SENSOR_SERVICE);
 		mAccelerometer = mSensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER);
 		mSensorManager.registerListener(this, mAccelerometer, SensorManager.SENSOR_DELAY_GAME);
 		mGravity = mSensorManager.getDefaultSensor(Sensor.TYPE_GRAVITY);
@@ -449,18 +434,15 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 		mPaymentsManager = PaymentsManager.createManager(this).initService();
 
-		godot_initialized=true;
-
+		godot_initialized = true;
 	}
 
 	@Override
 	public void onServiceConnected(Messenger m) {
-	    mRemoteService = DownloaderServiceMarshaller.CreateProxy(m);
-	    mRemoteService.onClientUpdated(mDownloaderClientStub.getMessenger());
+		mRemoteService = DownloaderServiceMarshaller.CreateProxy(m);
+		mRemoteService.onClientUpdated(mDownloaderClientStub.getMessenger());
 	}
 
-
-
 	@Override
 	protected void onCreate(Bundle icicle) {
 
@@ -474,59 +456,58 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 		//check for apk expansion API
 		if (true) {
-		        boolean md5mismatch = false;
+			boolean md5mismatch = false;
 			command_line = getCommandLine();
-			String main_pack_md5=null;
-			String main_pack_key=null;
+			String main_pack_md5 = null;
+			String main_pack_key = null;
 
 			List<String> new_args = new LinkedList<String>();
 
+			for (int i = 0; i < command_line.length; i++) {
 
-			for(int i=0;i<command_line.length;i++) {
-
-				boolean has_extra = i< command_line.length -1;
+				boolean has_extra = i < command_line.length - 1;
 				if (command_line[i].equals("--use_depth_32")) {
-					use_32_bits=true;
+					use_32_bits = true;
 				} else if (command_line[i].equals("--use_immersive")) {
-					use_immersive=true;
-					if(Build.VERSION.SDK_INT >= 19.0){ // check if the application runs on an android 4.4+
+					use_immersive = true;
+					if (Build.VERSION.SDK_INT >= 19.0) { // check if the application runs on an android 4.4+
 						window.getDecorView().setSystemUiVisibility(
-								    View.SYSTEM_UI_FLAG_LAYOUT_STABLE
-									    | View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION
-									    | View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN
-									    | View.SYSTEM_UI_FLAG_HIDE_NAVIGATION // hide nav bar
-									    | View.SYSTEM_UI_FLAG_FULLSCREEN // hide status bar
-									    | View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
+								View.SYSTEM_UI_FLAG_LAYOUT_STABLE |
+								View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION |
+								View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN |
+								View.SYSTEM_UI_FLAG_HIDE_NAVIGATION | // hide nav bar
+								View.SYSTEM_UI_FLAG_FULLSCREEN | // hide status bar
+								View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
 
 						UiChangeListener();
 					}
 				} else if (command_line[i].equals("--use_apk_expansion")) {
-					use_apk_expansion=true;
+					use_apk_expansion = true;
 				} else if (has_extra && command_line[i].equals("--apk_expansion_md5")) {
-					main_pack_md5=command_line[i+1];
+					main_pack_md5 = command_line[i + 1];
 					i++;
 				} else if (has_extra && command_line[i].equals("--apk_expansion_key")) {
-					main_pack_key=command_line[i+1];
+					main_pack_key = command_line[i + 1];
 					SharedPreferences prefs = getSharedPreferences("app_data_keys", MODE_PRIVATE);
 					Editor editor = prefs.edit();
 					editor.putString("store_public_key", main_pack_key);
 
 					editor.commit();
 					i++;
-				} else if (command_line[i].trim().length()!=0){
+				} else if (command_line[i].trim().length() != 0) {
 					new_args.add(command_line[i]);
 				}
 			}
 
-			if (new_args.isEmpty()){
-				command_line=null;
-			}else{
+			if (new_args.isEmpty()) {
+				command_line = null;
+			} else {
 
 				command_line = new_args.toArray(new String[new_args.size()]);
-                        }
-			if (use_apk_expansion && main_pack_md5!=null && main_pack_key!=null) {
+			}
+			if (use_apk_expansion && main_pack_md5 != null && main_pack_key != null) {
 				//check that environment is ok!
-				if (!Environment.getExternalStorageState().equals( Environment.MEDIA_MOUNTED )) {
+				if (!Environment.getExternalStorageState().equals(Environment.MEDIA_MOUNTED)) {
 					Log.d("GODOT", "**ERROR! No media mounted!");
 					//show popup and die
 				}
@@ -534,7 +515,7 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				// Build the full path to the app's expansion files
 				try {
 					expansion_pack_path = Helpers.getSaveFilePath(getApplicationContext());
-					expansion_pack_path+="/"+"main."+getPackageManager().getPackageInfo(getPackageName(), 0).versionCode+"."+this.getPackageName()+".obb";
+					expansion_pack_path += "/main." + getPackageManager().getPackageInfo(getPackageName(), 0).versionCode + "." + this.getPackageName() + ".obb";
 				} catch (Exception e) {
 					e.printStackTrace();
 				}
@@ -542,20 +523,20 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				File f = new File(expansion_pack_path);
 
 				boolean pack_valid = true;
-				Log.d("GODOT","**PACK** - Path "+expansion_pack_path);
+				Log.d("GODOT", "**PACK** - Path " + expansion_pack_path);
 
 				if (!f.exists()) {
 
-					pack_valid=false;
-					Log.d("GODOT","**PACK** - File does not exist");
+					pack_valid = false;
+					Log.d("GODOT", "**PACK** - File does not exist");
 
-				} else if(  obbIsCorrupted(expansion_pack_path, main_pack_md5)){
+				} else if (obbIsCorrupted(expansion_pack_path, main_pack_md5)) {
 					Log.d("GODOT", "**PACK** - Expansion pack (obb) is corrupted");
 					pack_valid = false;
-					try{
-					    f.delete();
-					}catch(Exception e){
-					    Log.d("GODOT", "**PACK** - Error deleting corrupted expansion pack (obb)");
+					try {
+						f.delete();
+					} catch (Exception e) {
+						Log.d("GODOT", "**PACK** - Error deleting corrupted expansion pack (obb)");
 					}
 				}
 
@@ -564,12 +545,12 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 					Intent notifierIntent = new Intent(this, this.getClass());
 					notifierIntent.setFlags(Intent.FLAG_ACTIVITY_NEW_TASK |
-			                                Intent.FLAG_ACTIVITY_CLEAR_TOP);
+											Intent.FLAG_ACTIVITY_CLEAR_TOP);
 
-                                                PendingIntent pendingIntent = PendingIntent.getActivity(this, 0,
-			                notifierIntent, PendingIntent.FLAG_UPDATE_CURRENT);
+					PendingIntent pendingIntent = PendingIntent.getActivity(this, 0,
+							notifierIntent, PendingIntent.FLAG_UPDATE_CURRENT);
 
-			        int startResult;
+					int startResult;
 					try {
 						Log.d("GODOT", "INITIALIZING DOWNLOAD");
 						startResult = DownloaderClientMarshaller.startDownloadServiceIfRequired(
@@ -578,36 +559,34 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 								GodotDownloaderService.class);
 						Log.d("GODOT", "DOWNLOAD SERVICE FINISHED:" + startResult);
 
-			        if (startResult != DownloaderClientMarshaller.NO_DOWNLOAD_REQUIRED) {
-						Log.d("GODOT", "DOWNLOAD REQUIRED");
-			            // This is where you do set up to display the download
-			            // progress (next step)
-			        	mDownloaderClientStub = DownloaderClientMarshaller.CreateStub(this,
-			        			GodotDownloaderService.class);
-
-			        	setContentView(com.godot.game.R.layout.downloading_expansion);
-			        	mPB = (ProgressBar) findViewById(com.godot.game.R.id.progressBar);
-			            mStatusText = (TextView) findViewById(com.godot.game.R.id.statusText);
-			            mProgressFraction = (TextView) findViewById(com.godot.game.R.id.progressAsFraction);
-			            mProgressPercent = (TextView) findViewById(com.godot.game.R.id.progressAsPercentage);
-			            mAverageSpeed = (TextView) findViewById(com.godot.game.R.id.progressAverageSpeed);
-			            mTimeRemaining = (TextView) findViewById(com.godot.game.R.id.progressTimeRemaining);
-			            mDashboard = findViewById(com.godot.game.R.id.downloaderDashboard);
-			            mCellMessage = findViewById(com.godot.game.R.id.approveCellular);
-			            mPauseButton = (Button) findViewById(com.godot.game.R.id.pauseButton);
-			            mWiFiSettingsButton = (Button) findViewById(com.godot.game.R.id.wifiSettingsButton);
-
-			            return;
-			        } else{
-			        	Log.d("GODOT", "NO DOWNLOAD REQUIRED");
-			        }
+						if (startResult != DownloaderClientMarshaller.NO_DOWNLOAD_REQUIRED) {
+							Log.d("GODOT", "DOWNLOAD REQUIRED");
+							// This is where you do set up to display the download
+							// progress (next step)
+							mDownloaderClientStub = DownloaderClientMarshaller.CreateStub(this,
+									GodotDownloaderService.class);
+
+							setContentView(com.godot.game.R.layout.downloading_expansion);
+							mPB = (ProgressBar)findViewById(com.godot.game.R.id.progressBar);
+							mStatusText = (TextView)findViewById(com.godot.game.R.id.statusText);
+							mProgressFraction = (TextView)findViewById(com.godot.game.R.id.progressAsFraction);
+							mProgressPercent = (TextView)findViewById(com.godot.game.R.id.progressAsPercentage);
+							mAverageSpeed = (TextView)findViewById(com.godot.game.R.id.progressAverageSpeed);
+							mTimeRemaining = (TextView)findViewById(com.godot.game.R.id.progressTimeRemaining);
+							mDashboard = findViewById(com.godot.game.R.id.downloaderDashboard);
+							mCellMessage = findViewById(com.godot.game.R.id.approveCellular);
+							mPauseButton = (Button)findViewById(com.godot.game.R.id.pauseButton);
+							mWiFiSettingsButton = (Button)findViewById(com.godot.game.R.id.wifiSettingsButton);
+
+							return;
+						} else {
+							Log.d("GODOT", "NO DOWNLOAD REQUIRED");
+						}
 					} catch (NameNotFoundException e) {
 						// TODO Auto-generated catch block
 						Log.d("GODOT", "Error downloading expansion package:" + e.getMessage());
 					}
-
 				}
-
 			}
 		}
 
@@ -615,28 +594,26 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 		initializeGodot();
 
-
 		//instanceSingleton( new GodotFacebook(this) );
-
-
 	}
 
+	@Override
+	protected void onDestroy() {
 
-	@Override protected void onDestroy(){
-
-		if(mPaymentsManager != null ) mPaymentsManager.destroy();
-		for(int i=0;i<singleton_count;i++) {
+		if (mPaymentsManager != null) mPaymentsManager.destroy();
+		for (int i = 0; i < singleton_count; i++) {
 			singletons[i].onMainDestroy();
 		}
 		super.onDestroy();
 	}
 
-	@Override protected void onPause() {
+	@Override
+	protected void onPause() {
 		super.onPause();
-		if (!godot_initialized){
+		if (!godot_initialized) {
 			if (null != mDownloaderClientStub) {
-		        mDownloaderClientStub.disconnect(this);
-		    }
+				mDownloaderClientStub.disconnect(this);
+			}
 			return;
 		}
 		mView.onPause();
@@ -648,17 +625,18 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		});
 		mSensorManager.unregisterListener(this);
 
-		for(int i=0;i<singleton_count;i++) {
+		for (int i = 0; i < singleton_count; i++) {
 			singletons[i].onMainPause();
 		}
 	}
 
-	@Override protected void onResume() {
+	@Override
+	protected void onResume() {
 		super.onResume();
-		if (!godot_initialized){
+		if (!godot_initialized) {
 			if (null != mDownloaderClientStub) {
-		        mDownloaderClientStub.connect(this);
-		    }
+				mDownloaderClientStub.connect(this);
+			}
 			return;
 		}
 
@@ -674,59 +652,58 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		mSensorManager.registerListener(this, mMagnetometer, SensorManager.SENSOR_DELAY_GAME);
 		mSensorManager.registerListener(this, mGyroscope, SensorManager.SENSOR_DELAY_GAME);
 
-		if(use_immersive && Build.VERSION.SDK_INT >= 19.0){ // check if the application runs on an android 4.4+
+		if (use_immersive && Build.VERSION.SDK_INT >= 19.0) { // check if the application runs on an android 4.4+
 			Window window = getWindow();
 			window.getDecorView().setSystemUiVisibility(
-					    View.SYSTEM_UI_FLAG_LAYOUT_STABLE
-						    | View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION
-						    | View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN
-						    | View.SYSTEM_UI_FLAG_HIDE_NAVIGATION // hide nav bar
-						    | View.SYSTEM_UI_FLAG_FULLSCREEN // hide status bar
-						    | View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
+					View.SYSTEM_UI_FLAG_LAYOUT_STABLE |
+					View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION |
+					View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN |
+					View.SYSTEM_UI_FLAG_HIDE_NAVIGATION | // hide nav bar
+					View.SYSTEM_UI_FLAG_FULLSCREEN | // hide status bar
+					View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
 		}
 
-		for(int i=0;i<singleton_count;i++) {
+		for (int i = 0; i < singleton_count; i++) {
 
 			singletons[i].onMainResume();
 		}
-
-
-
 	}
 
 	public void UiChangeListener() {
 		final View decorView = getWindow().getDecorView();
-		decorView.setOnSystemUiVisibilityChangeListener (new View.OnSystemUiVisibilityChangeListener() {
+		decorView.setOnSystemUiVisibilityChangeListener(new View.OnSystemUiVisibilityChangeListener() {
 			@Override
 			public void onSystemUiVisibilityChange(int visibility) {
 				if ((visibility & View.SYSTEM_UI_FLAG_FULLSCREEN) == 0) {
 					decorView.setSystemUiVisibility(
-					View.SYSTEM_UI_FLAG_LAYOUT_STABLE
-					| View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION
-					| View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN
-					| View.SYSTEM_UI_FLAG_HIDE_NAVIGATION
-					| View.SYSTEM_UI_FLAG_FULLSCREEN
-					| View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
+							View.SYSTEM_UI_FLAG_LAYOUT_STABLE |
+							View.SYSTEM_UI_FLAG_LAYOUT_HIDE_NAVIGATION |
+							View.SYSTEM_UI_FLAG_LAYOUT_FULLSCREEN |
+							View.SYSTEM_UI_FLAG_HIDE_NAVIGATION |
+							View.SYSTEM_UI_FLAG_FULLSCREEN |
+							View.SYSTEM_UI_FLAG_IMMERSIVE_STICKY);
 				}
 			}
 		});
 	}
 
-	@Override public void onSensorChanged(SensorEvent event) {
-		Display display = ((WindowManager) getSystemService(WINDOW_SERVICE)).getDefaultDisplay();
+	@Override
+	public void onSensorChanged(SensorEvent event) {
+		Display display = ((WindowManager)getSystemService(WINDOW_SERVICE)).getDefaultDisplay();
 		int displayRotation = display.getRotation();
 
 		float[] adjustedValues = new float[3];
 		final int axisSwap[][] = {
-		{  1,  -1,  0,  1  },     // ROTATION_0
-		{-1,  -1,  1,  0  },     // ROTATION_90
-		{-1,    1,  0,  1  },     // ROTATION_180
-		{  1,    1,  1,  0  }  }; // ROTATION_270
+			{ 1, -1, 0, 1 }, // ROTATION_0
+			{ -1, -1, 1, 0 }, // ROTATION_90
+			{ -1, 1, 0, 1 }, // ROTATION_180
+			{ 1, 1, 1, 0 }
+		}; // ROTATION_270
 
 		final int[] as = axisSwap[displayRotation];
-		adjustedValues[0]  =  (float)as[0] * event.values[ as[2] ];
-		adjustedValues[1]  =  (float)as[1] * event.values[ as[3] ];
-		adjustedValues[2]  =  event.values[2];
+		adjustedValues[0] = (float)as[0] * event.values[as[2]];
+		adjustedValues[1] = (float)as[1] * event.values[as[3]];
+		adjustedValues[2] = event.values[2];
 
 		final float x = adjustedValues[0];
 		final float y = adjustedValues[1];
@@ -738,27 +715,28 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 				@Override
 				public void run() {
 					if (typeOfSensor == Sensor.TYPE_ACCELEROMETER) {
-						GodotLib.accelerometer(-x,y,-z);
+						GodotLib.accelerometer(-x, y, -z);
 					}
 					if (typeOfSensor == Sensor.TYPE_GRAVITY) {
-						GodotLib.gravity(-x,y,-z);
+						GodotLib.gravity(-x, y, -z);
 					}
 					if (typeOfSensor == Sensor.TYPE_MAGNETIC_FIELD) {
-						GodotLib.magnetometer(-x,y,-z);
+						GodotLib.magnetometer(-x, y, -z);
 					}
 					if (typeOfSensor == Sensor.TYPE_GYROSCOPE) {
-						GodotLib.gyroscope(x,-y,z);
+						GodotLib.gyroscope(x, -y, z);
 					}
 				}
 			});
 		}
 	}
 
-	@Override public final void onAccuracyChanged(Sensor sensor, int accuracy) {
+	@Override
+	public final void onAccuracyChanged(Sensor sensor, int accuracy) {
 		// Do something here if sensor accuracy changes.
 	}
 
-/*
+	/*
 	@Override public boolean dispatchKeyEvent(KeyEvent event) {
 
 		if (event.getKeyCode()==KeyEvent.KEYCODE_BACK) {
@@ -772,12 +750,13 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 		return false;
 	}
-*/
+	*/
 
-	@Override public void onBackPressed() {
+	@Override
+	public void onBackPressed() {
 		boolean shouldQuit = true;
 
-		for(int i=0;i<singleton_count;i++) {
+		for (int i = 0; i < singleton_count; i++) {
 			if (singletons[i].onMainBackPressed()) {
 				shouldQuit = false;
 			}
@@ -799,71 +778,68 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		System.exit(0);
 	}
 
+	private boolean obbIsCorrupted(String f, String main_pack_md5) {
 
+		try {
 
-	private boolean obbIsCorrupted(String f, String main_pack_md5){
-
-		    try {
-
-			    InputStream fis =  new FileInputStream(f);
+			InputStream fis = new FileInputStream(f);
 
-			    // Create MD5 Hash
-			    byte[] buffer = new byte[16384];
-
-			    MessageDigest complete = MessageDigest.getInstance("MD5");
-			    int numRead;
-			    do {
-				    numRead = fis.read(buffer);
-				    if (numRead > 0) {
-					    complete.update(buffer, 0, numRead);
-				    }
-			    } while (numRead != -1);
+			// Create MD5 Hash
+			byte[] buffer = new byte[16384];
 
+			MessageDigest complete = MessageDigest.getInstance("MD5");
+			int numRead;
+			do {
+				numRead = fis.read(buffer);
+				if (numRead > 0) {
+					complete.update(buffer, 0, numRead);
+				}
+			} while (numRead != -1);
 
-			    fis.close();
-			    byte[] messageDigest = complete.digest();
+			fis.close();
+			byte[] messageDigest = complete.digest();
 
-			    // Create Hex String
-			    StringBuffer hexString = new StringBuffer();
-			    for (int i=0; i<messageDigest.length; i++) {
-				    String s = Integer.toHexString(0xFF & messageDigest[i]);
+			// Create Hex String
+			StringBuffer hexString = new StringBuffer();
+			for (int i = 0; i < messageDigest.length; i++) {
+				String s = Integer.toHexString(0xFF & messageDigest[i]);
 
-				    if (s.length()==1) {
-					s="0"+s;
-				    }
-				    hexString.append(s);
-			    }
-			    String md5str =  hexString.toString();
+				if (s.length() == 1) {
+					s = "0" + s;
+				}
+				hexString.append(s);
+			}
+			String md5str = hexString.toString();
 
-			    //Log.d("GODOT","**PACK** - My MD5: "+hexString+" - APK md5: "+main_pack_md5);
-			    if (!md5str.equals(main_pack_md5)) {
-				    Log.d("GODOT","**PACK MD5 MISMATCH???** - MD5 Found: "+md5str+" "+Integer.toString(md5str.length())+" - MD5 Expected: "+main_pack_md5+" "+Integer.toString(main_pack_md5.length()));
-				    return true;
-			    }
-			    return false;
-		    } catch (Exception e) {
-			    e.printStackTrace();
-			    Log.d("GODOT","**PACK FAIL**");
-			    return true;
-		    }
+			//Log.d("GODOT","**PACK** - My MD5: "+hexString+" - APK md5: "+main_pack_md5);
+			if (!md5str.equals(main_pack_md5)) {
+				Log.d("GODOT", "**PACK MD5 MISMATCH???** - MD5 Found: " + md5str + " " + Integer.toString(md5str.length()) + " - MD5 Expected: " + main_pack_md5 + " " + Integer.toString(main_pack_md5.length()));
+				return true;
+			}
+			return false;
+		} catch (Exception e) {
+			e.printStackTrace();
+			Log.d("GODOT", "**PACK FAIL**");
+			return true;
+		}
 	}
 
 	//@Override public boolean dispatchTouchEvent (MotionEvent event) {
 	public boolean gotTouchEvent(final MotionEvent event) {
 
 		super.onTouchEvent(event);
-		final int evcount=event.getPointerCount();
-		if (evcount==0)
+		final int evcount = event.getPointerCount();
+		if (evcount == 0)
 			return true;
 
 		if (mView != null) {
-			final int[] arr = new int[event.getPointerCount()*3];
+			final int[] arr = new int[event.getPointerCount() * 3];
 
-			for(int i=0;i<event.getPointerCount();i++) {
+			for (int i = 0; i < event.getPointerCount(); i++) {
 
-				arr[i*3+0]=(int)event.getPointerId(i);
-				arr[i*3+1]=(int)event.getX(i);
-				arr[i*3+2]=(int)event.getY(i);
+				arr[i * 3 + 0] = (int)event.getPointerId(i);
+				arr[i * 3 + 1] = (int)event.getX(i);
+				arr[i * 3 + 2] = (int)event.getY(i);
 			}
 
 			//System.out.printf("gaction: %d\n",event.getAction());
@@ -871,13 +847,13 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 			mView.queueEvent(new Runnable() {
 				@Override
 				public void run() {
-					switch(action) {
+					switch (action) {
 						case MotionEvent.ACTION_DOWN: {
-							GodotLib.touch(0,0,evcount,arr);
+							GodotLib.touch(0, 0, evcount, arr);
 							//System.out.printf("action down at: %f,%f\n", event.getX(),event.getY());
 						} break;
 						case MotionEvent.ACTION_MOVE: {
-							GodotLib.touch(1,0,evcount,arr);
+							GodotLib.touch(1, 0, evcount, arr);
 							/*
 							for(int i=0;i<event.getPointerCount();i++) {
 								System.out.printf("%d - moved to: %f,%f\n",i, event.getX(i),event.getY(i));
@@ -887,17 +863,17 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 						case MotionEvent.ACTION_POINTER_UP: {
 							final int indexPointUp = event.getActionIndex();
 							final int pointer_idx = event.getPointerId(indexPointUp);
-							GodotLib.touch(4,pointer_idx,evcount,arr);
+							GodotLib.touch(4, pointer_idx, evcount, arr);
 							//System.out.printf("%d - s.up at: %f,%f\n",pointer_idx, event.getX(pointer_idx),event.getY(pointer_idx));
 						} break;
 						case MotionEvent.ACTION_POINTER_DOWN: {
 							int pointer_idx = event.getActionIndex();
-							GodotLib.touch(3,pointer_idx,evcount,arr);
+							GodotLib.touch(3, pointer_idx, evcount, arr);
 							//System.out.printf("%d - s.down at: %f,%f\n",pointer_idx, event.getX(pointer_idx),event.getY(pointer_idx));
 						} break;
 						case MotionEvent.ACTION_CANCEL:
 						case MotionEvent.ACTION_UP: {
-							GodotLib.touch(2,0,evcount,arr);
+							GodotLib.touch(2, 0, evcount, arr);
 							/*
 							for(int i=0;i<event.getPointerCount();i++) {
 								System.out.printf("%d - up! %f,%f\n",i, event.getX(i),event.getY(i));
@@ -911,14 +887,16 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 		return true;
 	}
 
-	@Override public boolean onKeyMultiple(final int inKeyCode, int repeatCount, KeyEvent event) {
+	@Override
+	public boolean onKeyMultiple(final int inKeyCode, int repeatCount, KeyEvent event) {
 		String s = event.getCharacters();
 		if (s == null || s.length() == 0)
 			return super.onKeyMultiple(inKeyCode, repeatCount, event);
 
 		final char[] cc = s.toCharArray();
 		int cnt = 0;
-		for (int i = cc.length; --i >= 0; cnt += cc[i] != 0 ? 1 : 0);
+		for (int i = cc.length; --i >= 0; cnt += cc[i] != 0 ? 1 : 0)
+			;
 		if (cnt == 0) return super.onKeyMultiple(inKeyCode, repeatCount, event);
 		final Activity me = this;
 		queueEvent(new Runnable() {
@@ -939,7 +917,6 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 
 	private void queueEvent(Runnable runnable) {
 		// TODO Auto-generated method stub
-
 	}
 
 	public PaymentsManager getPaymentsManager() {
@@ -952,7 +929,6 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
 	}
 	*/
 
-
 	// Audio
 
 	/**
@@ -960,110 +936,106 @@ public class Godot extends Activity implements SensorEventListener, IDownloaderC
      * to show the state as being indeterminate at times. This sample can be
      * considered a guideline.
      */
-    @Override
-    public void onDownloadStateChanged(int newState) {
-    	Log.d("GODOT", "onDownloadStateChanged:" + newState);
-        setState(newState);
-        boolean showDashboard = true;
-        boolean showCellMessage = false;
-        boolean paused;
-        boolean indeterminate;
-        switch (newState) {
-            case IDownloaderClient.STATE_IDLE:
-            	Log.d("GODOT", "DOWNLOAD STATE IDLE");
-                // STATE_IDLE means the service is listening, so it's
-                // safe to start making calls via mRemoteService.
-                paused = false;
-                indeterminate = true;
-                break;
-            case IDownloaderClient.STATE_CONNECTING:
-            case IDownloaderClient.STATE_FETCHING_URL:
-            	Log.d("GODOT", "DOWNLOAD STATE CONNECTION / FETCHING URL");
-                showDashboard = true;
-                paused = false;
-                indeterminate = true;
-                break;
-            case IDownloaderClient.STATE_DOWNLOADING:
-            	Log.d("GODOT", "DOWNLOAD STATE DOWNLOADING");
-                paused = false;
-                showDashboard = true;
-                indeterminate = false;
-                break;
-
-            case IDownloaderClient.STATE_FAILED_CANCELED:
-            case IDownloaderClient.STATE_FAILED:
-            case IDownloaderClient.STATE_FAILED_FETCHING_URL:
-            case IDownloaderClient.STATE_FAILED_UNLICENSED:
-            	Log.d("GODOT", "DOWNLOAD STATE: FAILED, CANCELLED, UNLICENSED OR FAILED TO FETCH URL");
-                paused = true;
-                showDashboard = false;
-                indeterminate = false;
-                break;
-            case IDownloaderClient.STATE_PAUSED_NEED_CELLULAR_PERMISSION:
-            case IDownloaderClient.STATE_PAUSED_WIFI_DISABLED_NEED_CELLULAR_PERMISSION:
-            	Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY MISSING CELLULAR PERMISSION");
-                showDashboard = false;
-                paused = true;
-                indeterminate = false;
-                showCellMessage = true;
-                break;
-
-            case IDownloaderClient.STATE_PAUSED_BY_REQUEST:
-            	Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY USER");
-                paused = true;
-                indeterminate = false;
-                break;
-            case IDownloaderClient.STATE_PAUSED_ROAMING:
-            case IDownloaderClient.STATE_PAUSED_SDCARD_UNAVAILABLE:
-            	Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY ROAMING OR SDCARD UNAVAILABLE");
-                paused = true;
-                indeterminate = false;
-                break;
-            case IDownloaderClient.STATE_COMPLETED:
-            	Log.d("GODOT", "DOWNLOAD STATE: COMPLETED");
-                showDashboard = false;
-                paused = false;
-                indeterminate = false;
-//                validateXAPKZipFiles();
-                initializeGodot();
-                return;
-            default:
-            	Log.d("GODOT", "DOWNLOAD STATE: DEFAULT");
-                paused = true;
-                indeterminate = true;
-                showDashboard = true;
-        }
-        int newDashboardVisibility = showDashboard ? View.VISIBLE : View.GONE;
-        if (mDashboard.getVisibility() != newDashboardVisibility) {
-            mDashboard.setVisibility(newDashboardVisibility);
-        }
-        int cellMessageVisibility = showCellMessage ? View.VISIBLE : View.GONE;
-        if (mCellMessage.getVisibility() != cellMessageVisibility) {
-            mCellMessage.setVisibility(cellMessageVisibility);
-        }
-
-        mPB.setIndeterminate(indeterminate);
-        setButtonPausedState(paused);
-    }
+	@Override
+	public void onDownloadStateChanged(int newState) {
+		Log.d("GODOT", "onDownloadStateChanged:" + newState);
+		setState(newState);
+		boolean showDashboard = true;
+		boolean showCellMessage = false;
+		boolean paused;
+		boolean indeterminate;
+		switch (newState) {
+			case IDownloaderClient.STATE_IDLE:
+				Log.d("GODOT", "DOWNLOAD STATE IDLE");
+				// STATE_IDLE means the service is listening, so it's
+				// safe to start making calls via mRemoteService.
+				paused = false;
+				indeterminate = true;
+				break;
+			case IDownloaderClient.STATE_CONNECTING:
+			case IDownloaderClient.STATE_FETCHING_URL:
+				Log.d("GODOT", "DOWNLOAD STATE CONNECTION / FETCHING URL");
+				showDashboard = true;
+				paused = false;
+				indeterminate = true;
+				break;
+			case IDownloaderClient.STATE_DOWNLOADING:
+				Log.d("GODOT", "DOWNLOAD STATE DOWNLOADING");
+				paused = false;
+				showDashboard = true;
+				indeterminate = false;
+				break;
+
+			case IDownloaderClient.STATE_FAILED_CANCELED:
+			case IDownloaderClient.STATE_FAILED:
+			case IDownloaderClient.STATE_FAILED_FETCHING_URL:
+			case IDownloaderClient.STATE_FAILED_UNLICENSED:
+				Log.d("GODOT", "DOWNLOAD STATE: FAILED, CANCELLED, UNLICENSED OR FAILED TO FETCH URL");
+				paused = true;
+				showDashboard = false;
+				indeterminate = false;
+				break;
+			case IDownloaderClient.STATE_PAUSED_NEED_CELLULAR_PERMISSION:
+			case IDownloaderClient.STATE_PAUSED_WIFI_DISABLED_NEED_CELLULAR_PERMISSION:
+				Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY MISSING CELLULAR PERMISSION");
+				showDashboard = false;
+				paused = true;
+				indeterminate = false;
+				showCellMessage = true;
+				break;
+
+			case IDownloaderClient.STATE_PAUSED_BY_REQUEST:
+				Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY USER");
+				paused = true;
+				indeterminate = false;
+				break;
+			case IDownloaderClient.STATE_PAUSED_ROAMING:
+			case IDownloaderClient.STATE_PAUSED_SDCARD_UNAVAILABLE:
+				Log.d("GODOT", "DOWNLOAD STATE: PAUSED BY ROAMING OR SDCARD UNAVAILABLE");
+				paused = true;
+				indeterminate = false;
+				break;
+			case IDownloaderClient.STATE_COMPLETED:
+				Log.d("GODOT", "DOWNLOAD STATE: COMPLETED");
+				showDashboard = false;
+				paused = false;
+				indeterminate = false;
+				//                validateXAPKZipFiles();
+				initializeGodot();
+				return;
+			default:
+				Log.d("GODOT", "DOWNLOAD STATE: DEFAULT");
+				paused = true;
+				indeterminate = true;
+				showDashboard = true;
+		}
+		int newDashboardVisibility = showDashboard ? View.VISIBLE : View.GONE;
+		if (mDashboard.getVisibility() != newDashboardVisibility) {
+			mDashboard.setVisibility(newDashboardVisibility);
+		}
+		int cellMessageVisibility = showCellMessage ? View.VISIBLE : View.GONE;
+		if (mCellMessage.getVisibility() != cellMessageVisibility) {
+			mCellMessage.setVisibility(cellMessageVisibility);
+		}
 
+		mPB.setIndeterminate(indeterminate);
+		setButtonPausedState(paused);
+	}
 
 	@Override
 	public void onDownloadProgress(DownloadProgressInfo progress) {
 		mAverageSpeed.setText(getString(com.godot.game.R.string.kilobytes_per_second,
-                Helpers.getSpeedString(progress.mCurrentSpeed)));
-        mTimeRemaining.setText(getString(com.godot.game.R.string.time_remaining,
-                Helpers.getTimeRemaining(progress.mTimeRemaining)));
-
-        progress.mOverallTotal = progress.mOverallTotal;
-        mPB.setMax((int) (progress.mOverallTotal >> 8));
-        mPB.setProgress((int) (progress.mOverallProgress >> 8));
-        mProgressPercent.setText(Long.toString(progress.mOverallProgress
-                * 100 /
-                progress.mOverallTotal) + "%");
-        mProgressFraction.setText(Helpers.getDownloadProgressString
-                (progress.mOverallProgress,
-                        progress.mOverallTotal));
-
+				Helpers.getSpeedString(progress.mCurrentSpeed)));
+		mTimeRemaining.setText(getString(com.godot.game.R.string.time_remaining,
+				Helpers.getTimeRemaining(progress.mTimeRemaining)));
+
+		progress.mOverallTotal = progress.mOverallTotal;
+		mPB.setMax((int)(progress.mOverallTotal >> 8));
+		mPB.setProgress((int)(progress.mOverallProgress >> 8));
+		mProgressPercent.setText(Long.toString(progress.mOverallProgress * 100 /
+											   progress.mOverallTotal) +
+								 "%");
+		mProgressFraction.setText(Helpers.getDownloadProgressString(progress.mOverallProgress,
+				progress.mOverallTotal));
 	}
-
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotDownloaderAlarmReceiver.java b/platform/android/java/src/org/godotengine/godot/GodotDownloaderAlarmReceiver.java
index 2c668dd586..568c7a4140 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotDownloaderAlarmReceiver.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotDownloaderAlarmReceiver.java
@@ -46,14 +46,14 @@ import android.util.Log;
  */
 public class GodotDownloaderAlarmReceiver extends BroadcastReceiver {
 
-    @Override
-    public void onReceive(Context context, Intent intent) {
-    	Log.d("GODOT", "Alarma recivida");
-	try {
-	    DownloaderClientMarshaller.startDownloadServiceIfRequired(context, intent, GodotDownloaderService.class);
-	} catch (NameNotFoundException e) {
-	    e.printStackTrace();
-	    Log.d("GODOT", "Exception: " + e.getClass().getName() + ":" + e.getMessage());
+	@Override
+	public void onReceive(Context context, Intent intent) {
+		Log.d("GODOT", "Alarma recivida");
+		try {
+			DownloaderClientMarshaller.startDownloadServiceIfRequired(context, intent, GodotDownloaderService.class);
+		} catch (NameNotFoundException e) {
+			e.printStackTrace();
+			Log.d("GODOT", "Exception: " + e.getClass().getName() + ":" + e.getMessage());
+		}
 	}
-    }
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotDownloaderService.java b/platform/android/java/src/org/godotengine/godot/GodotDownloaderService.java
index 97ba7826fb..b8b3b925c5 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotDownloaderService.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotDownloaderService.java
@@ -40,46 +40,45 @@ import com.google.android.vending.expansion.downloader.impl.DownloaderService;
  * DownloaderService from the Downloader library.
  */
 public class GodotDownloaderService extends DownloaderService {
-    // stuff for LVL -- MODIFY FOR YOUR APPLICATION!
-    private static final String BASE64_PUBLIC_KEY = "REPLACE THIS WITH YOUR PUBLIC KEY";
-    // used by the preference obfuscater
-    private static final byte[] SALT = new byte[] {
-	    1, 43, -12, -1, 54, 98,
-	    -100, -12, 43, 2, -8, -4, 9, 5, -106, -108, -33, 45, -1, 84
-    };
+	// stuff for LVL -- MODIFY FOR YOUR APPLICATION!
+	private static final String BASE64_PUBLIC_KEY = "REPLACE THIS WITH YOUR PUBLIC KEY";
+	// used by the preference obfuscater
+	private static final byte[] SALT = new byte[] {
+		1, 43, -12, -1, 54, 98,
+		-100, -12, 43, 2, -8, -4, 9, 5, -106, -108, -33, 45, -1, 84
+	};
 
-    /**
+	/**
      * This public key comes from your Android Market publisher account, and it
      * used by the LVL to validate responses from Market on your behalf.
      */
-    @Override
-    public String getPublicKey() {
-    	SharedPreferences prefs = getApplicationContext().getSharedPreferences("app_data_keys", Context.MODE_PRIVATE);
-    	Log.d("GODOT", "getting public key:" + prefs.getString("store_public_key", null));
-    	return prefs.getString("store_public_key", null);
-		
-	//return BASE64_PUBLIC_KEY;
-    }
+	@Override
+	public String getPublicKey() {
+		SharedPreferences prefs = getApplicationContext().getSharedPreferences("app_data_keys", Context.MODE_PRIVATE);
+		Log.d("GODOT", "getting public key:" + prefs.getString("store_public_key", null));
+		return prefs.getString("store_public_key", null);
 
-    /**
+		//return BASE64_PUBLIC_KEY;
+	}
+
+	/**
      * This is used by the preference obfuscater to make sure that your
      * obfuscated preferences are different than the ones used by other
      * applications.
      */
-    @Override
-    public byte[] getSALT() {
-	return SALT;
-    }
+	@Override
+	public byte[] getSALT() {
+		return SALT;
+	}
 
-    /**
+	/**
      * Fill this in with the class name for your alarm receiver. We do this
      * because receivers must be unique across all of Android (it's a good idea
      * to make sure that your receiver is in your unique package)
      */
-    @Override
-    public String getAlarmReceiverClassName() {
-    	Log.d("GODOT", "getAlarmReceiverClassName()");
-    	return GodotDownloaderAlarmReceiver.class.getName();
-    }
-
+	@Override
+	public String getAlarmReceiverClassName() {
+		Log.d("GODOT", "getAlarmReceiverClassName()");
+		return GodotDownloaderAlarmReceiver.class.getName();
+	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotIO.java b/platform/android/java/src/org/godotengine/godot/GodotIO.java
index 989fd2b609..12a2467a29 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotIO.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotIO.java
@@ -56,7 +56,6 @@ import org.godotengine.godot.input.*;
 
 public class GodotIO {
 
-
 	AssetManager am;
 	Godot activity;
 	GodotEditText edit;
@@ -64,35 +63,32 @@ public class GodotIO {
 	Context applicationContext;
 	MediaPlayer mediaPlayer;
 
-	final int SCREEN_LANDSCAPE=0;
-	final int SCREEN_PORTRAIT=1;
-	final int SCREEN_REVERSE_LANDSCAPE=2;
-	final int SCREEN_REVERSE_PORTRAIT=3;
-	final int SCREEN_SENSOR_LANDSCAPE=4;
-	final int SCREEN_SENSOR_PORTRAIT=5;
-	final int SCREEN_SENSOR=6;
+	final int SCREEN_LANDSCAPE = 0;
+	final int SCREEN_PORTRAIT = 1;
+	final int SCREEN_REVERSE_LANDSCAPE = 2;
+	final int SCREEN_REVERSE_PORTRAIT = 3;
+	final int SCREEN_SENSOR_LANDSCAPE = 4;
+	final int SCREEN_SENSOR_PORTRAIT = 5;
+	final int SCREEN_SENSOR = 6;
 
 	/////////////////////////
 	/// FILES
 	/////////////////////////
 
-	public int last_file_id=1;
+	public int last_file_id = 1;
 
 	class AssetData {
 
-
-		public boolean eof=false;
+		public boolean eof = false;
 		public String path;
 		public InputStream is;
 		public int len;
 		public int pos;
 	}
 
+	HashMap<Integer, AssetData> streams;
 
-	HashMap<Integer,AssetData> streams;
-
-
-	public int file_open(String path,boolean write) {
+	public int file_open(String path, boolean write) {
 
 		//System.out.printf("file_open: Attempt to Open %s\n",path);
 
@@ -100,7 +96,6 @@ public class GodotIO {
 		if (write)
 			return -1;
 
-
 		AssetData ad = new AssetData();
 
 		try {
@@ -113,76 +108,73 @@ public class GodotIO {
 		}
 
 		try {
-			ad.len=ad.is.available();
+			ad.len = ad.is.available();
 		} catch (Exception e) {
 
-			System.out.printf("Exception availabling on file_open: %s\n",path);
+			System.out.printf("Exception availabling on file_open: %s\n", path);
 			return -1;
 		}
 
-		ad.path=path;
-		ad.pos=0;
+		ad.path = path;
+		ad.pos = 0;
 		++last_file_id;
-		streams.put(last_file_id,ad);
+		streams.put(last_file_id, ad);
 
 		return last_file_id;
 	}
 	public int file_get_size(int id) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_get_size: Invalid file id: %d\n",id);
+			System.out.printf("file_get_size: Invalid file id: %d\n", id);
 			return -1;
 		}
 
 		return streams.get(id).len;
-
 	}
-	public void file_seek(int id,int bytes) {
+	public void file_seek(int id, int bytes) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_get_size: Invalid file id: %d\n",id);
+			System.out.printf("file_get_size: Invalid file id: %d\n", id);
 			return;
 		}
 		//seek sucks
 		AssetData ad = streams.get(id);
-		if (bytes>ad.len)
-			bytes=ad.len;
-		if (bytes<0)
-			bytes=0;
+		if (bytes > ad.len)
+			bytes = ad.len;
+		if (bytes < 0)
+			bytes = 0;
 
 		try {
 
-		if (bytes > (int)ad.pos) {
-			int todo=bytes-(int)ad.pos;
-			while(todo>0) {
-				todo-=ad.is.skip(todo);
-			}
-			ad.pos=bytes;
-		} else if (bytes<(int)ad.pos) {
+			if (bytes > (int)ad.pos) {
+				int todo = bytes - (int)ad.pos;
+				while (todo > 0) {
+					todo -= ad.is.skip(todo);
+				}
+				ad.pos = bytes;
+			} else if (bytes < (int)ad.pos) {
 
-			ad.is=am.open(ad.path);
+				ad.is = am.open(ad.path);
 
-			ad.pos=bytes;
-			int todo=bytes;
-			while(todo>0) {
-				todo-=ad.is.skip(todo);
+				ad.pos = bytes;
+				int todo = bytes;
+				while (todo > 0) {
+					todo -= ad.is.skip(todo);
+				}
 			}
-		}
 
-		ad.eof=false;
+			ad.eof = false;
 		} catch (IOException e) {
 
-			System.out.printf("Exception on file_seek: %s\n",e);
+			System.out.printf("Exception on file_seek: %s\n", e);
 			return;
 		}
-
-
 	}
 
 	public int file_tell(int id) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_read: Can't tell eof for invalid file id: %d\n",id);
+			System.out.printf("file_read: Can't tell eof for invalid file id: %d\n", id);
 			return 0;
 		}
 
@@ -192,7 +184,7 @@ public class GodotIO {
 	public boolean file_eof(int id) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_read: Can't check eof for invalid file id: %d\n",id);
+			System.out.printf("file_read: Can't check eof for invalid file id: %d\n", id);
 			return false;
 		}
 
@@ -203,73 +195,65 @@ public class GodotIO {
 	public byte[] file_read(int id, int bytes) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_read: Can't read invalid file id: %d\n",id);
+			System.out.printf("file_read: Can't read invalid file id: %d\n", id);
 			return new byte[0];
 		}
 
-
 		AssetData ad = streams.get(id);
 
 		if (ad.pos + bytes > ad.len) {
 
-			bytes=ad.len-ad.pos;
-			ad.eof=true;
+			bytes = ad.len - ad.pos;
+			ad.eof = true;
 		}
 
-
-		if (bytes==0) {
+		if (bytes == 0) {
 
 			return new byte[0];
 		}
 
-
-
-		byte[] buf1=new byte[bytes];
-		int r=0;
+		byte[] buf1 = new byte[bytes];
+		int r = 0;
 		try {
 			r = ad.is.read(buf1);
 		} catch (IOException e) {
 
-			System.out.printf("Exception on file_read: %s\n",e);
+			System.out.printf("Exception on file_read: %s\n", e);
 			return new byte[bytes];
 		}
 
-		if (r==0) {
+		if (r == 0) {
 			return new byte[0];
 		}
 
-		ad.pos+=r;
+		ad.pos += r;
 
-		if (r<bytes) {
+		if (r < bytes) {
 
-			byte[] buf2=new byte[r];
-			for(int i=0;i<r;i++)
-				buf2[i]=buf1[i];
+			byte[] buf2 = new byte[r];
+			for (int i = 0; i < r; i++)
+				buf2[i] = buf1[i];
 			return buf2;
 		} else {
 
 			return buf1;
 		}
-
 	}
 
 	public void file_close(int id) {
 
 		if (!streams.containsKey(id)) {
-			System.out.printf("file_close: Can't close invalid file id: %d\n",id);
+			System.out.printf("file_close: Can't close invalid file id: %d\n", id);
 			return;
 		}
 
 		streams.remove(id);
-
 	}
 
-
 	/////////////////////////
 	/// DIRECTORIES
 	/////////////////////////
 
-
 	class AssetDir {
 
 		public String[] files;
@@ -277,49 +261,48 @@ public class GodotIO {
 		public String path;
 	}
 
-	public int last_dir_id=1;
+	public int last_dir_id = 1;
 
-	HashMap<Integer,AssetDir> dirs;
+	HashMap<Integer, AssetDir> dirs;
 
 	public int dir_open(String path) {
 
 		AssetDir ad = new AssetDir();
-		ad.current=0;
-		ad.path=path;
+		ad.current = 0;
+		ad.path = path;
 
 		try {
 			ad.files = am.list(path);
 			// no way to find path is directory or file exactly.
 			// but if ad.files.length==0, then it's an empty directory or file.
-			if (ad.files.length==0) {
+			if (ad.files.length == 0) {
 				return -1;
 			}
 		} catch (IOException e) {
 
-			System.out.printf("Exception on dir_open: %s\n",e);
+			System.out.printf("Exception on dir_open: %s\n", e);
 			return -1;
 		}
 
 		//System.out.printf("Opened dir: %s\n",path);
 		++last_dir_id;
-		dirs.put(last_dir_id,ad);
+		dirs.put(last_dir_id, ad);
 
 		return last_dir_id;
-
 	}
 
 	public boolean dir_is_dir(int id) {
 		if (!dirs.containsKey(id)) {
-			System.out.printf("dir_next: invalid dir id: %d\n",id);
+			System.out.printf("dir_next: invalid dir id: %d\n", id);
 			return false;
 		}
 		AssetDir ad = dirs.get(id);
 		//System.out.printf("go next: %d,%d\n",ad.current,ad.files.length);
 		int idx = ad.current;
-		if (idx>0)
+		if (idx > 0)
 			idx--;
 
-		if (idx>=ad.files.length)
+		if (idx >= ad.files.length)
 			return false;
 		String fname = ad.files[idx];
 
@@ -327,7 +310,7 @@ public class GodotIO {
 			if (ad.path.equals(""))
 				am.open(fname);
 			else
-				am.open(ad.path+"/"+fname);
+				am.open(ad.path + "/" + fname);
 			return false;
 		} catch (Exception e) {
 			return true;
@@ -337,46 +320,41 @@ public class GodotIO {
 	public String dir_next(int id) {
 
 		if (!dirs.containsKey(id)) {
-			System.out.printf("dir_next: invalid dir id: %d\n",id);
+			System.out.printf("dir_next: invalid dir id: %d\n", id);
 			return "";
 		}
 
 		AssetDir ad = dirs.get(id);
 		//System.out.printf("go next: %d,%d\n",ad.current,ad.files.length);
 
-		if (ad.current>=ad.files.length) {
+		if (ad.current >= ad.files.length) {
 			ad.current++;
 			return "";
 		}
 		String r = ad.files[ad.current];
 		ad.current++;
 		return r;
-
 	}
 
 	public void dir_close(int id) {
 
 		if (!dirs.containsKey(id)) {
-			System.out.printf("dir_close: invalid dir id: %d\n",id);
+			System.out.printf("dir_close: invalid dir id: %d\n", id);
 			return;
 		}
 
 		dirs.remove(id);
 	}
 
-
-
 	GodotIO(Godot p_activity) {
 
-		am=p_activity.getAssets();
-		activity=p_activity;
-		streams=new HashMap<Integer,AssetData>();
-		dirs=new HashMap<Integer,AssetDir>();
+		am = p_activity.getAssets();
+		activity = p_activity;
+		streams = new HashMap<Integer, AssetData>();
+		dirs = new HashMap<Integer, AssetDir>();
 		applicationContext = activity.getApplicationContext();
-
 	}
 
-
 	/////////////////////////
 	// AUDIO
 	/////////////////////////
@@ -400,7 +378,7 @@ public class GodotIO {
 		desiredFrames = Math.max(desiredFrames, (AudioTrack.getMinBufferSize(sampleRate, channelConfig, audioFormat) + frameSize - 1) / frameSize);
 
 		mAudioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, sampleRate,
-			channelConfig, audioFormat, desiredFrames * frameSize, AudioTrack.MODE_STREAM);
+				channelConfig, audioFormat, desiredFrames * frameSize, AudioTrack.MODE_STREAM);
 
 		audioStartThread();
 
@@ -412,10 +390,10 @@ public class GodotIO {
 
 	public void audioStartThread() {
 		mAudioThread = new Thread(new Runnable() {
-		    public void run() {
-			mAudioTrack.play();
-			GodotLib.audio();
-		}
+			public void run() {
+				mAudioTrack.play();
+				GodotLib.audio();
+			}
 		});
 
 		// I'd take REALTIME if I could get it!
@@ -424,15 +402,15 @@ public class GodotIO {
 	}
 
 	public void audioWriteShortBuffer(short[] buffer) {
-		for (int i = 0; i < buffer.length; ) {
+		for (int i = 0; i < buffer.length;) {
 			int result = mAudioTrack.write(buffer, i, buffer.length - i);
 			if (result > 0) {
 				i += result;
 			} else if (result == 0) {
 				try {
-				    Thread.sleep(1);
-				} catch(InterruptedException e) {
-				    // Nom nom
+					Thread.sleep(1);
+				} catch (InterruptedException e) {
+					// Nom nom
 				}
 			} else {
 				Log.w("Godot", "Godot audio: error return from write(short)");
@@ -441,18 +419,16 @@ public class GodotIO {
 		}
 	}
 
-
-
 	public void audioQuit() {
 		if (mAudioThread != null) {
 			try {
 				mAudioThread.join();
-			} catch(Exception e) {
+			} catch (Exception e) {
 				Log.v("Godot", "Problem stopping audio thread: " + e);
 			}
 			mAudioThread = null;
 
-		//Log.v("Godot", "Finished waiting for audio thread");
+			//Log.v("Godot", "Finished waiting for audio thread");
 		}
 
 		if (mAudioTrack != null) {
@@ -473,20 +449,18 @@ public class GodotIO {
 	// MISCELLANEOUS OS IO
 	/////////////////////////
 
-
-
 	public int openURI(String p_uri) {
 
 		try {
 			Log.v("MyApp", "TRYING TO OPEN URI: " + p_uri);
 			String path = p_uri;
-			String type="";
+			String type = "";
 			if (path.startsWith("/")) {
 				//absolute path to filesystem, prepend file://
-				path="file://"+path;
+				path = "file://" + path;
 				if (p_uri.endsWith(".png") || p_uri.endsWith(".jpg") || p_uri.endsWith(".gif") || p_uri.endsWith(".webp")) {
 
-					type="image/*";
+					type = "image/*";
 				}
 			}
 
@@ -531,7 +505,7 @@ public class GodotIO {
 	}
 
 	public void showKeyboard(String p_existing_text) {
-		if(edit != null)
+		if (edit != null)
 			edit.showKeyboard(p_existing_text);
 
 		//InputMethodManager inputMgr = (InputMethodManager)activity.getSystemService(Context.INPUT_METHOD_SERVICE);
@@ -539,21 +513,21 @@ public class GodotIO {
 	};
 
 	public void hideKeyboard() {
-		if(edit != null)
+		if (edit != null)
 			edit.hideKeyboard();
 
-        InputMethodManager inputMgr = (InputMethodManager)activity.getSystemService(Context.INPUT_METHOD_SERVICE);
-        View v = activity.getCurrentFocus();
-        if (v != null) {
-            inputMgr.hideSoftInputFromWindow(v.getWindowToken(), InputMethodManager.HIDE_NOT_ALWAYS);
-        } else {
-            inputMgr.hideSoftInputFromWindow(new View(activity).getWindowToken(), InputMethodManager.HIDE_NOT_ALWAYS);
-        }
+		InputMethodManager inputMgr = (InputMethodManager)activity.getSystemService(Context.INPUT_METHOD_SERVICE);
+		View v = activity.getCurrentFocus();
+		if (v != null) {
+			inputMgr.hideSoftInputFromWindow(v.getWindowToken(), InputMethodManager.HIDE_NOT_ALWAYS);
+		} else {
+			inputMgr.hideSoftInputFromWindow(new View(activity).getWindowToken(), InputMethodManager.HIDE_NOT_ALWAYS);
+		}
 	};
 
 	public void setScreenOrientation(int p_orientation) {
 
-		switch(p_orientation) {
+		switch (p_orientation) {
 
 			case SCREEN_LANDSCAPE: {
 				activity.setRequestedOrientation(ActivityInfo.SCREEN_ORIENTATION_LANDSCAPE);
@@ -576,16 +550,14 @@ public class GodotIO {
 			case SCREEN_SENSOR: {
 				activity.setRequestedOrientation(ActivityInfo.SCREEN_ORIENTATION_FULL_SENSOR);
 			} break;
-
 		}
 	};
-	
+
 	public void setEdit(GodotEditText _edit) {
 		edit = _edit;
 	}
 
-	public void playVideo(String p_path)
-	{
+	public void playVideo(String p_path) {
 		Uri filePath = Uri.parse(p_path);
 		mediaPlayer = new MediaPlayer();
 
@@ -594,11 +566,9 @@ public class GodotIO {
 			mediaPlayer.setDataSource(applicationContext, filePath);
 			mediaPlayer.prepare();
 			mediaPlayer.start();
+		} catch (IOException e) {
+			System.out.println("IOError while playing video");
 		}
-		catch(IOException e)
-        {
-            System.out.println("IOError while playing video");
-        }
 	}
 
 	public boolean isVideoPlaying() {
@@ -621,49 +591,47 @@ public class GodotIO {
 		}
 	}
 
-
-	public static final int SYSTEM_DIR_DESKTOP=0;
-	public static final int SYSTEM_DIR_DCIM=1;
-	public static final int SYSTEM_DIR_DOCUMENTS=2;
-	public static final int SYSTEM_DIR_DOWNLOADS=3;
-	public static final int SYSTEM_DIR_MOVIES=4;
-	public static final int SYSTEM_DIR_MUSIC=5;
-	public static final int SYSTEM_DIR_PICTURES=6;
-	public static final int SYSTEM_DIR_RINGTONES=7;
-
+	public static final int SYSTEM_DIR_DESKTOP = 0;
+	public static final int SYSTEM_DIR_DCIM = 1;
+	public static final int SYSTEM_DIR_DOCUMENTS = 2;
+	public static final int SYSTEM_DIR_DOWNLOADS = 3;
+	public static final int SYSTEM_DIR_MOVIES = 4;
+	public static final int SYSTEM_DIR_MUSIC = 5;
+	public static final int SYSTEM_DIR_PICTURES = 6;
+	public static final int SYSTEM_DIR_RINGTONES = 7;
 
 	public String getSystemDir(int idx) {
 
-		String what="";
-		switch(idx) {
+		String what = "";
+		switch (idx) {
 			case SYSTEM_DIR_DESKTOP: {
 				//what=Environment.DIRECTORY_DOCUMENTS;
-				what=Environment.DIRECTORY_DOWNLOADS;
+				what = Environment.DIRECTORY_DOWNLOADS;
 			} break;
 			case SYSTEM_DIR_DCIM: {
-				what=Environment.DIRECTORY_DCIM;
+				what = Environment.DIRECTORY_DCIM;
 
 			} break;
 			case SYSTEM_DIR_DOCUMENTS: {
-				what=Environment.DIRECTORY_DOWNLOADS;
+				what = Environment.DIRECTORY_DOWNLOADS;
 				//what=Environment.DIRECTORY_DOCUMENTS;
 			} break;
 			case SYSTEM_DIR_DOWNLOADS: {
-				what=Environment.DIRECTORY_DOWNLOADS;
+				what = Environment.DIRECTORY_DOWNLOADS;
 
 			} break;
 			case SYSTEM_DIR_MOVIES: {
-				what=Environment.DIRECTORY_MOVIES;
+				what = Environment.DIRECTORY_MOVIES;
 
 			} break;
 			case SYSTEM_DIR_MUSIC: {
-				what=Environment.DIRECTORY_MUSIC;
+				what = Environment.DIRECTORY_MUSIC;
 			} break;
 			case SYSTEM_DIR_PICTURES: {
-				what=Environment.DIRECTORY_PICTURES;
+				what = Environment.DIRECTORY_PICTURES;
 			} break;
 			case SYSTEM_DIR_RINGTONES: {
-				what=Environment.DIRECTORY_RINGTONES;
+				what = Environment.DIRECTORY_RINGTONES;
 
 			} break;
 		}
@@ -676,10 +644,9 @@ public class GodotIO {
 	protected static final String PREFS_FILE = "device_id.xml";
 	protected static final String PREFS_DEVICE_ID = "device_id";
 
-	public static String unique_id="";
+	public static String unique_id = "";
 	public String getUniqueID() {
 
-		return  unique_id;
+		return unique_id;
 	}
-
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotLib.java b/platform/android/java/src/org/godotengine/godot/GodotLib.java
index 6b84ad6555..873d30eb34 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotLib.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotLib.java
@@ -33,43 +33,41 @@ package org.godotengine.godot;
 
 public class GodotLib {
 
+	public static GodotIO io;
 
-     public static GodotIO io;
+	static {
+		System.loadLibrary("godot_android");
+	}
 
-     static {
-       System.loadLibrary("godot_android");
-     }
-
-    /**
+	/**
      * @param width the current view width
      * @param height the current view height
      */
 
-     public static native void initialize(Godot p_instance,boolean need_reload_hook,Object p_asset_manager, boolean use_apk_expansion);
-		 public static native void setup(String[] p_cmdline);
-     public static native void resize(int width, int height,boolean reload);
-     public static native void newcontext(boolean p_32_bits);
-     public static native void back();
-     public static native void step();
-     public static native void touch(int what,int pointer,int howmany, int[] arr);
-     public static native void accelerometer(float x, float y, float z);
-     public static native void gravity(float x, float y, float z);
-     public static native void magnetometer(float x, float y, float z);
-     public static native void gyroscope(float x, float y, float z);
-	 public static native void key(int p_scancode, int p_unicode_char, boolean p_pressed);
-	 public static native void joybutton(int p_device, int p_but, boolean p_pressed);
-	 public static native void joyaxis(int p_device, int p_axis, float p_value);
-	 public static native void joyhat(int p_device, int p_hat_x, int p_hat_y);
-	 public static native void joyconnectionchanged(int p_device, boolean p_connected, String p_name);
-     public static native void focusin();
-     public static native void focusout();
-     public static native void audio();
-     public static native void singleton(String p_name,Object p_object);
-     public static native void method(String p_sname,String p_name,String p_ret,String[] p_params);
-     public static native String getGlobal(String p_key);
+	public static native void initialize(Godot p_instance, boolean need_reload_hook, Object p_asset_manager, boolean use_apk_expansion);
+	public static native void setup(String[] p_cmdline);
+	public static native void resize(int width, int height, boolean reload);
+	public static native void newcontext(boolean p_32_bits);
+	public static native void back();
+	public static native void step();
+	public static native void touch(int what, int pointer, int howmany, int[] arr);
+	public static native void accelerometer(float x, float y, float z);
+	public static native void gravity(float x, float y, float z);
+	public static native void magnetometer(float x, float y, float z);
+	public static native void gyroscope(float x, float y, float z);
+	public static native void key(int p_scancode, int p_unicode_char, boolean p_pressed);
+	public static native void joybutton(int p_device, int p_but, boolean p_pressed);
+	public static native void joyaxis(int p_device, int p_axis, float p_value);
+	public static native void joyhat(int p_device, int p_hat_x, int p_hat_y);
+	public static native void joyconnectionchanged(int p_device, boolean p_connected, String p_name);
+	public static native void focusin();
+	public static native void focusout();
+	public static native void audio();
+	public static native void singleton(String p_name, Object p_object);
+	public static native void method(String p_sname, String p_name, String p_ret, String[] p_params);
+	public static native String getGlobal(String p_key);
 	public static native void callobject(int p_ID, String p_method, Object[] p_params);
 	public static native void calldeferred(int p_ID, String p_method, Object[] p_params);
 
 	public static native void setVirtualKeyboardHeight(int p_height);
-
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotPaymentV3.java b/platform/android/java/src/org/godotengine/godot/GodotPaymentV3.java
index 8fe79fdfc7..61d10ed9e4 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotPaymentV3.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotPaymentV3.java
@@ -40,7 +40,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-
 public class GodotPaymentV3 extends Godot.SingletonBase {
 
 	private Godot activity;
@@ -67,8 +66,8 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 
 	public GodotPaymentV3(Activity p_activity) {
 
-		registerClass("GodotPayments", new String[]{"purchase", "setPurchaseCallbackId", "setPurchaseValidationUrlPrefix", "setTransactionId", "getSignature", "consumeUnconsumedPurchases", "requestPurchased", "setAutoConsume", "consume", "querySkuDetails"});
-		activity = (Godot) p_activity;
+		registerClass("GodotPayments", new String[] { "purchase", "setPurchaseCallbackId", "setPurchaseValidationUrlPrefix", "setTransactionId", "getSignature", "consumeUnconsumedPurchases", "requestPurchased", "setAutoConsume", "consume", "querySkuDetails" });
+		activity = (Godot)p_activity;
 		mPaymentManager = activity.getPaymentsManager();
 		mPaymentManager.setBaseSingleton(this);
 	}
@@ -89,32 +88,32 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 	}
 
 	public void callbackSuccess(String ticket, String signature, String sku) {
-		GodotLib.calldeferred(purchaseCallbackId, "purchase_success", new Object[]{ticket, signature, sku});
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_success", new Object[] { ticket, signature, sku });
 	}
 
 	public void callbackSuccessProductMassConsumed(String ticket, String signature, String sku) {
 		Log.d(this.getClass().getName(), "callbackSuccessProductMassConsumed > " + ticket + "," + signature + "," + sku);
-		GodotLib.calldeferred(purchaseCallbackId, "consume_success", new Object[]{ticket, signature, sku});
+		GodotLib.calldeferred(purchaseCallbackId, "consume_success", new Object[] { ticket, signature, sku });
 	}
 
 	public void callbackSuccessNoUnconsumedPurchases() {
-		GodotLib.calldeferred(purchaseCallbackId, "consume_not_required", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "consume_not_required", new Object[] {});
 	}
 
 	public void callbackFailConsume() {
-		GodotLib.calldeferred(purchaseCallbackId, "consume_fail", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "consume_fail", new Object[] {});
 	}
 
 	public void callbackFail() {
-		GodotLib.calldeferred(purchaseCallbackId, "purchase_fail", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_fail", new Object[] {});
 	}
 
 	public void callbackCancel() {
-		GodotLib.calldeferred(purchaseCallbackId, "purchase_cancel", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_cancel", new Object[] {});
 	}
 
 	public void callbackAlreadyOwned(String sku) {
-		GodotLib.calldeferred(purchaseCallbackId, "purchase_owned", new Object[]{sku});
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_owned", new Object[] { sku });
 	}
 
 	public int getPurchaseCallbackId() {
@@ -161,7 +160,7 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 
 	// callback for requestPurchased()
 	public void callbackPurchased(String receipt, String signature, String sku) {
-		GodotLib.calldeferred(purchaseCallbackId, "has_purchased", new Object[]{receipt, signature, sku});
+		GodotLib.calldeferred(purchaseCallbackId, "has_purchased", new Object[] { receipt, signature, sku });
 	}
 
 	// consume item automatically after purchase. default is true.
@@ -210,10 +209,10 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 	}
 
 	public void completeSkuDetail() {
-		GodotLib.calldeferred(purchaseCallbackId, "sku_details_complete", new Object[]{mSkuDetails});
+		GodotLib.calldeferred(purchaseCallbackId, "sku_details_complete", new Object[] { mSkuDetails });
 	}
 
 	public void errorSkuDetail(String errorMessage) {
-		GodotLib.calldeferred(purchaseCallbackId, "sku_details_error", new Object[]{errorMessage});
+		GodotLib.calldeferred(purchaseCallbackId, "sku_details_error", new Object[] { errorMessage });
 	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/GodotView.java b/platform/android/java/src/org/godotengine/godot/GodotView.java
index b807b952d4..b762aa021a 100644
--- a/platform/android/java/src/org/godotengine/godot/GodotView.java
+++ b/platform/android/java/src/org/godotengine/godot/GodotView.java
@@ -77,20 +77,19 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 	private static Context ctx;
 
 	private static GodotIO io;
-	private static boolean firsttime=true;
-	private static boolean use_gl3=false;
-	private static boolean use_32=false;
+	private static boolean firsttime = true;
+	private static boolean use_gl3 = false;
+	private static boolean use_32 = false;
 
 	private Godot activity;
 
-
 	private InputManagerCompat mInputManager;
-	public GodotView(Context context,GodotIO p_io,boolean p_use_gl3, boolean p_use_32_bits, Godot p_activity) {
+	public GodotView(Context context, GodotIO p_io, boolean p_use_gl3, boolean p_use_32_bits, Godot p_activity) {
 		super(context);
-		ctx=context;
-		io=p_io;
-		use_gl3=p_use_gl3;
-		use_32=p_use_32_bits;
+		ctx = context;
+		io = p_io;
+		use_gl3 = p_use_gl3;
+		use_32 = p_use_32_bits;
 
 		activity = p_activity;
 
@@ -101,14 +100,15 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		mInputManager = InputManagerCompat.Factory.getInputManager(this.getContext());
 		mInputManager.registerInputDeviceListener(this, null);
 		init(false, 16, 0);
-    }
+	}
 
-    public GodotView(Context context, boolean translucent, int depth, int stencil) {
+	public GodotView(Context context, boolean translucent, int depth, int stencil) {
 		super(context);
 		init(translucent, depth, stencil);
-    }
+	}
 
-	@Override public boolean onTouchEvent (MotionEvent event) {
+	@Override
+	public boolean onTouchEvent(MotionEvent event) {
 
 		return activity.gotTouchEvent(event);
 	};
@@ -196,16 +196,17 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 	ArrayList<joystick> joy_devices = new ArrayList<joystick>();
 
 	private int find_joy_device(int device_id) {
-		for (int i=0; i<joy_devices.size(); i++) {
+		for (int i = 0; i < joy_devices.size(); i++) {
 			if (joy_devices.get(i).device_id == device_id) {
-					return i;
+				return i;
 			}
 		}
 		onInputDeviceAdded(device_id);
 		return joy_devices.size() - 1;
 	}
 
-	@Override public void onInputDeviceAdded(int deviceId) {
+	@Override
+	public void onInputDeviceAdded(int deviceId) {
 		joystick joy = new joystick();
 		joy.device_id = deviceId;
 		final int id = joy_devices.size();
@@ -219,8 +220,7 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		for (InputDevice.MotionRange range : ranges) {
 			if (range.getAxis() == MotionEvent.AXIS_HAT_X || range.getAxis() == MotionEvent.AXIS_HAT_Y) {
 				joy.hats.add(range);
-			}
-			else {
+			} else {
 				joy.axes.add(range);
 			}
 		}
@@ -231,9 +231,10 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 				GodotLib.joyconnectionchanged(id, true, name);
 			}
 		});
-  }
+	}
 
-	@Override public void onInputDeviceRemoved(int deviceId) {
+	@Override
+	public void onInputDeviceRemoved(int deviceId) {
 		final int id = find_joy_device(deviceId);
 		joy_devices.remove(id);
 		queueEvent(new Runnable() {
@@ -244,10 +245,11 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		});
 	}
 
-	@Override public void onInputDeviceChanged(int deviceId) {
-
+	@Override
+	public void onInputDeviceChanged(int deviceId) {
 	}
-	@Override public boolean onKeyUp(final int keyCode, KeyEvent event) {
+	@Override
+	public boolean onKeyUp(final int keyCode, KeyEvent event) {
 
 		if (keyCode == KeyEvent.KEYCODE_BACK) {
 			return true;
@@ -282,7 +284,8 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		return super.onKeyUp(keyCode, event);
 	};
 
-	@Override public boolean onKeyDown(final int keyCode, KeyEvent event) {
+	@Override
+	public boolean onKeyDown(final int keyCode, KeyEvent event) {
 
 		if (keyCode == KeyEvent.KEYCODE_BACK) {
 			activity.onBackPressed();
@@ -326,7 +329,8 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		return super.onKeyDown(keyCode, event);
 	}
 
-	@Override public boolean onGenericMotionEvent(MotionEvent event) {
+	@Override
+	public boolean onGenericMotionEvent(MotionEvent event) {
 
 		if ((event.getSource() & InputDevice.SOURCE_JOYSTICK) == InputDevice.SOURCE_JOYSTICK && event.getAction() == MotionEvent.ACTION_MOVE) {
 
@@ -335,7 +339,7 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 
 			for (int i = 0; i < joy.axes.size(); i++) {
 				InputDevice.MotionRange range = joy.axes.get(i);
-				final float value = (event.getAxisValue(range.getAxis()) - range.getMin() ) / range.getRange() * 2.0f - 1.0f;
+				final float value = (event.getAxisValue(range.getAxis()) - range.getMin()) / range.getRange() * 2.0f - 1.0f;
 				//Log.e(TAG, String.format("axis event: %d, value %f", i, value));
 				final int idx = i;
 				queueEvent(new Runnable() {
@@ -346,9 +350,9 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 				});
 			}
 
-			for (int i = 0; i < joy.hats.size(); i+=2) {
+			for (int i = 0; i < joy.hats.size(); i += 2) {
 				final int hatX = Math.round(event.getAxisValue(joy.hats.get(i).getAxis()));
-				final int hatY = Math.round(event.getAxisValue(joy.hats.get(i+1).getAxis()));
+				final int hatY = Math.round(event.getAxisValue(joy.hats.get(i + 1).getAxis()));
 				//Log.e(TAG, String.format("HAT EVENT %d, %d", hatX, hatY));
 				queueEvent(new Runnable() {
 					@Override
@@ -363,8 +367,7 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		return super.onGenericMotionEvent(event);
 	};
 
-
-    private void init(boolean translucent, int depth, int stencil) {
+	private void init(boolean translucent, int depth, int stencil) {
 
 		this.setFocusableInTouchMode(true);
 		/* By default, GLSurfaceView() creates a RGB_565 opaque surface.
@@ -388,14 +391,14 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		 */
 
 		if (use_32) {
-			setEGLConfigChooser( translucent ?
-						new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(8, 8, 8, 8, 16, stencil)) :
-						new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(5, 6, 5, 0, 16, stencil)) );
+			setEGLConfigChooser(translucent ?
+										new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(8, 8, 8, 8, 16, stencil)) :
+										new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(5, 6, 5, 0, 16, stencil)));
 
 		} else {
-			setEGLConfigChooser( translucent ?
-						new ConfigChooser(8, 8, 8, 8, 16, stencil) :
-						new ConfigChooser(5, 6, 5, 0, 16, stencil) );
+			setEGLConfigChooser(translucent ?
+										new ConfigChooser(8, 8, 8, 8, 16, stencil) :
+										new ConfigChooser(5, 6, 5, 0, 16, stencil));
 		}
 
 		/* Set the renderer responsible for frame rendering */
@@ -403,33 +406,33 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 	}
 
 	private static class ContextFactory implements GLSurfaceView.EGLContextFactory {
-	private static int EGL_CONTEXT_CLIENT_VERSION = 0x3098;
-	public EGLContext createContext(EGL10 egl, EGLDisplay display, EGLConfig eglConfig) {
-		if (use_gl3)
-			Log.w(TAG, "creating OpenGL ES 3.0 context :");
-		else
-			Log.w(TAG, "creating OpenGL ES 2.0 context :");
-
-		checkEglError("Before eglCreateContext", egl);
-		int[] attrib_list2 = {EGL_CONTEXT_CLIENT_VERSION, 2, EGL10.EGL_NONE };
-		int[] attrib_list3 = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL10.EGL_NONE };
-		EGLContext context = egl.eglCreateContext(display, eglConfig, EGL10.EGL_NO_CONTEXT, use_gl3?attrib_list3:attrib_list2);
-		checkEglError("After eglCreateContext", egl);
-		return context;
-	}
+		private static int EGL_CONTEXT_CLIENT_VERSION = 0x3098;
+		public EGLContext createContext(EGL10 egl, EGLDisplay display, EGLConfig eglConfig) {
+			if (use_gl3)
+				Log.w(TAG, "creating OpenGL ES 3.0 context :");
+			else
+				Log.w(TAG, "creating OpenGL ES 2.0 context :");
+
+			checkEglError("Before eglCreateContext", egl);
+			int[] attrib_list2 = { EGL_CONTEXT_CLIENT_VERSION, 2, EGL10.EGL_NONE };
+			int[] attrib_list3 = { EGL_CONTEXT_CLIENT_VERSION, 3, EGL10.EGL_NONE };
+			EGLContext context = egl.eglCreateContext(display, eglConfig, EGL10.EGL_NO_CONTEXT, use_gl3 ? attrib_list3 : attrib_list2);
+			checkEglError("After eglCreateContext", egl);
+			return context;
+		}
 
-	public void destroyContext(EGL10 egl, EGLDisplay display, EGLContext context) {
-	    egl.eglDestroyContext(display, context);
+		public void destroyContext(EGL10 egl, EGLDisplay display, EGLContext context) {
+			egl.eglDestroyContext(display, context);
+		}
 	}
-    }
 
-    private static void checkEglError(String prompt, EGL10 egl) {
-	int error;
-	while ((error = egl.eglGetError()) != EGL10.EGL_SUCCESS) {
-	    Log.e(TAG, String.format("%s: EGL error: 0x%x", prompt, error));
+	private static void checkEglError(String prompt, EGL10 egl) {
+		int error;
+		while ((error = egl.eglGetError()) != EGL10.EGL_SUCCESS) {
+			Log.e(TAG, String.format("%s: EGL error: 0x%x", prompt, error));
+		}
 	}
-    }
-    	/* Fallback if 32bit View is not supported*/
+	/* Fallback if 32bit View is not supported*/
 	private static class FallbackConfigChooser extends ConfigChooser {
 		private ConfigChooser fallback;
 
@@ -438,17 +441,17 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 			this.fallback = fallback;
 		}
 
-      		@Override
+		@Override
 		public EGLConfig chooseConfig(EGL10 egl, EGLDisplay display, EGLConfig[] configs) {
 			EGLConfig ec = super.chooseConfig(egl, display, configs);
 			if (ec == null) {
-	  			Log.w(TAG, "Trying ConfigChooser fallback");
-	  			ec = fallback.chooseConfig(egl, display, configs);
-				use_32=false;
+				Log.w(TAG, "Trying ConfigChooser fallback");
+				ec = fallback.chooseConfig(egl, display, configs);
+				use_32 = false;
 			}
 			return ec;
-      		}
-    	}
+		}
+	}
 
 	private static class ConfigChooser implements GLSurfaceView.EGLConfigChooser {
 
@@ -467,46 +470,46 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		 */
 		private static int EGL_OPENGL_ES2_BIT = 4;
 		private static int[] s_configAttribs2 =
-		{
-			EGL10.EGL_RED_SIZE, 4,
-			EGL10.EGL_GREEN_SIZE, 4,
-			EGL10.EGL_BLUE_SIZE, 4,
-		  //  EGL10.EGL_DEPTH_SIZE,     16,
-		   // EGL10.EGL_STENCIL_SIZE,   EGL10.EGL_DONT_CARE,
-			EGL10.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
-			EGL10.EGL_NONE
-		};
+				{
+					EGL10.EGL_RED_SIZE, 4,
+					EGL10.EGL_GREEN_SIZE, 4,
+					EGL10.EGL_BLUE_SIZE, 4,
+					//  EGL10.EGL_DEPTH_SIZE,     16,
+					// EGL10.EGL_STENCIL_SIZE,   EGL10.EGL_DONT_CARE,
+					EGL10.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
+					EGL10.EGL_NONE
+				};
 		private static int[] s_configAttribs3 =
-		{
-			EGL10.EGL_RED_SIZE, 4,
-			EGL10.EGL_GREEN_SIZE, 4,
-			EGL10.EGL_BLUE_SIZE, 4,
-		   // EGL10.EGL_DEPTH_SIZE,     16,
-		  //  EGL10.EGL_STENCIL_SIZE,   EGL10.EGL_DONT_CARE,
-			EGL10.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, //apparently there is no EGL_OPENGL_ES3_BIT
-			EGL10.EGL_NONE
-		};
+				{
+					EGL10.EGL_RED_SIZE, 4,
+					EGL10.EGL_GREEN_SIZE, 4,
+					EGL10.EGL_BLUE_SIZE, 4,
+					// EGL10.EGL_DEPTH_SIZE,     16,
+					//  EGL10.EGL_STENCIL_SIZE,   EGL10.EGL_DONT_CARE,
+					EGL10.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, //apparently there is no EGL_OPENGL_ES3_BIT
+					EGL10.EGL_NONE
+				};
 
 		public EGLConfig chooseConfig(EGL10 egl, EGLDisplay display) {
 
 			/* Get the number of minimally matching EGL configurations
 			 */
 			int[] num_config = new int[1];
-			egl.eglChooseConfig(display, use_gl3?s_configAttribs3:s_configAttribs2, null, 0, num_config);
+			egl.eglChooseConfig(display, use_gl3 ? s_configAttribs3 : s_configAttribs2, null, 0, num_config);
 
 			int numConfigs = num_config[0];
 
 			if (numConfigs <= 0) {
-			throw new IllegalArgumentException("No configs match configSpec");
+				throw new IllegalArgumentException("No configs match configSpec");
 			}
 
 			/* Allocate then read the array of minimally matching EGL configs
 			 */
 			EGLConfig[] configs = new EGLConfig[numConfigs];
-			egl.eglChooseConfig(display, use_gl3?s_configAttribs3:s_configAttribs2, configs, numConfigs, num_config);
+			egl.eglChooseConfig(display, use_gl3 ? s_configAttribs3 : s_configAttribs2, configs, numConfigs, num_config);
 
 			if (DEBUG) {
-			 printConfigs(egl, display, configs);
+				printConfigs(egl, display, configs);
 			}
 			/* Now return the "best" one
 			 */
@@ -514,54 +517,54 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 		}
 
 		public EGLConfig chooseConfig(EGL10 egl, EGLDisplay display,
-			EGLConfig[] configs) {
-			for(EGLConfig config : configs) {
-			int d = findConfigAttrib(egl, display, config,
-				EGL10.EGL_DEPTH_SIZE, 0);
-			int s = findConfigAttrib(egl, display, config,
-				EGL10.EGL_STENCIL_SIZE, 0);
-
-			// We need at least mDepthSize and mStencilSize bits
-			if (d < mDepthSize || s < mStencilSize)
-				continue;
-
-			// We want an *exact* match for red/green/blue/alpha
-			int r = findConfigAttrib(egl, display, config,
-				EGL10.EGL_RED_SIZE, 0);
-			int g = findConfigAttrib(egl, display, config,
-					EGL10.EGL_GREEN_SIZE, 0);
-			int b = findConfigAttrib(egl, display, config,
-					EGL10.EGL_BLUE_SIZE, 0);
-			int a = findConfigAttrib(egl, display, config,
-				EGL10.EGL_ALPHA_SIZE, 0);
-
-			if (r == mRedSize && g == mGreenSize && b == mBlueSize && a == mAlphaSize)
-				return config;
+				EGLConfig[] configs) {
+			for (EGLConfig config : configs) {
+				int d = findConfigAttrib(egl, display, config,
+						EGL10.EGL_DEPTH_SIZE, 0);
+				int s = findConfigAttrib(egl, display, config,
+						EGL10.EGL_STENCIL_SIZE, 0);
+
+				// We need at least mDepthSize and mStencilSize bits
+				if (d < mDepthSize || s < mStencilSize)
+					continue;
+
+				// We want an *exact* match for red/green/blue/alpha
+				int r = findConfigAttrib(egl, display, config,
+						EGL10.EGL_RED_SIZE, 0);
+				int g = findConfigAttrib(egl, display, config,
+						EGL10.EGL_GREEN_SIZE, 0);
+				int b = findConfigAttrib(egl, display, config,
+						EGL10.EGL_BLUE_SIZE, 0);
+				int a = findConfigAttrib(egl, display, config,
+						EGL10.EGL_ALPHA_SIZE, 0);
+
+				if (r == mRedSize && g == mGreenSize && b == mBlueSize && a == mAlphaSize)
+					return config;
 			}
 			return null;
 		}
 
 		private int findConfigAttrib(EGL10 egl, EGLDisplay display,
-			EGLConfig config, int attribute, int defaultValue) {
+				EGLConfig config, int attribute, int defaultValue) {
 
 			if (egl.eglGetConfigAttrib(display, config, attribute, mValue)) {
-			return mValue[0];
+				return mValue[0];
 			}
 			return defaultValue;
 		}
 
 		private void printConfigs(EGL10 egl, EGLDisplay display,
-			EGLConfig[] configs) {
+				EGLConfig[] configs) {
 			int numConfigs = configs.length;
 			Log.w(TAG, String.format("%d configurations", numConfigs));
 			for (int i = 0; i < numConfigs; i++) {
-			Log.w(TAG, String.format("Configuration %d:\n", i));
-			printConfig(egl, display, configs[i]);
+				Log.w(TAG, String.format("Configuration %d:\n", i));
+				printConfig(egl, display, configs[i]);
 			}
 		}
 
 		private void printConfig(EGL10 egl, EGLDisplay display,
-			EGLConfig config) {
+				EGLConfig config) {
 			int[] attributes = {
 				EGL10.EGL_BUFFER_SIZE,
 				EGL10.EGL_ALPHA_SIZE,
@@ -634,14 +637,15 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 			};
 			int[] value = new int[1];
 			for (int i = 0; i < attributes.length; i++) {
-			int attribute = attributes[i];
-			String name = names[i];
-			if ( egl.eglGetConfigAttrib(display, config, attribute, value)) {
-				Log.w(TAG, String.format("  %s: %d\n", name, value[0]));
-			} else {
-				// Log.w(TAG, String.format("  %s: failed\n", name));
-				while (egl.eglGetError() != EGL10.EGL_SUCCESS);
-			}
+				int attribute = attributes[i];
+				String name = names[i];
+				if (egl.eglGetConfigAttrib(display, config, attribute, value)) {
+					Log.w(TAG, String.format("  %s: %d\n", name, value[0]));
+				} else {
+					// Log.w(TAG, String.format("  %s: failed\n", name));
+					while (egl.eglGetError() != EGL10.EGL_SUCCESS)
+						;
+				}
 			}
 		}
 
@@ -657,19 +661,18 @@ public class GodotView extends GLSurfaceView implements InputDeviceListener {
 
 	private static class Renderer implements GLSurfaceView.Renderer {
 
-
 		public void onDrawFrame(GL10 gl) {
 			GodotLib.step();
-			for(int i=0;i<Godot.singleton_count;i++) {
+			for (int i = 0; i < Godot.singleton_count; i++) {
 				Godot.singletons[i].onGLDrawFrame(gl);
 			}
 		}
 
 		public void onSurfaceChanged(GL10 gl, int width, int height) {
 
-			GodotLib.resize(width, height,!firsttime);
-			firsttime=false;
-			for(int i=0;i<Godot.singleton_count;i++) {
+			GodotLib.resize(width, height, !firsttime);
+			firsttime = false;
+			for (int i = 0; i < Godot.singleton_count; i++) {
 				Godot.singletons[i].onGLSurfaceChanged(gl, width, height);
 			}
 		}
diff --git a/platform/android/java/src/org/godotengine/godot/input/GodotEditText.java b/platform/android/java/src/org/godotengine/godot/input/GodotEditText.java
index d8a3ac5591..9e062d89c6 100644
--- a/platform/android/java/src/org/godotengine/godot/input/GodotEditText.java
+++ b/platform/android/java/src/org/godotengine/godot/input/GodotEditText.java
@@ -70,42 +70,37 @@ public class GodotEditText extends EditText {
 		super(context, attrs, defStyle);
 		this.initView();
 	}
-	
+
 	protected void initView() {
-		this.setPadding(0,  0, 0, 0);
+		this.setPadding(0, 0, 0, 0);
 		this.setImeOptions(EditorInfo.IME_FLAG_NO_EXTRACT_UI);
-		
+
 		sHandler = new Handler() {
 			@Override
 			public void handleMessage(final Message msg) {
 				switch (msg.what) {
-					case HANDLER_OPEN_IME_KEYBOARD:
-						{
-							GodotEditText edit = (GodotEditText) msg.obj;
-							String text = edit.mOriginText;
-							if (edit.requestFocus())
-							{
-								edit.removeTextChangedListener(edit.mInputWrapper);
-								edit.setText("");
-								edit.append(text);
-								edit.mInputWrapper.setOriginText(text);
-								edit.addTextChangedListener(edit.mInputWrapper);
-								final InputMethodManager imm = (InputMethodManager) mView.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
-								imm.showSoftInput(edit, 0);
-							}
+					case HANDLER_OPEN_IME_KEYBOARD: {
+						GodotEditText edit = (GodotEditText)msg.obj;
+						String text = edit.mOriginText;
+						if (edit.requestFocus()) {
+							edit.removeTextChangedListener(edit.mInputWrapper);
+							edit.setText("");
+							edit.append(text);
+							edit.mInputWrapper.setOriginText(text);
+							edit.addTextChangedListener(edit.mInputWrapper);
+							final InputMethodManager imm = (InputMethodManager)mView.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
+							imm.showSoftInput(edit, 0);
 						}
-						break;
-
-					case HANDLER_CLOSE_IME_KEYBOARD:
-						{
-							GodotEditText edit = (GodotEditText) msg.obj;
-							
-							edit.removeTextChangedListener(mInputWrapper);
-							final InputMethodManager imm = (InputMethodManager) mView.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
-							imm.hideSoftInputFromWindow(edit.getWindowToken(), 0);
-							edit.mView.requestFocus();
-						}
-						break;
+					} break;
+
+					case HANDLER_CLOSE_IME_KEYBOARD: {
+						GodotEditText edit = (GodotEditText)msg.obj;
+
+						edit.removeTextChangedListener(mInputWrapper);
+						final InputMethodManager imm = (InputMethodManager)mView.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
+						imm.hideSoftInputFromWindow(edit.getWindowToken(), 0);
+						edit.mView.requestFocus();
+					} break;
 				}
 			}
 		};
@@ -116,7 +111,7 @@ public class GodotEditText extends EditText {
 	// ===========================================================
 	public void setView(final GodotView view) {
 		this.mView = view;
-		if(mInputWrapper == null)
+		if (mInputWrapper == null)
 			mInputWrapper = new GodotTextInputWrapper(mView, this);
 		this.setOnEditorActionListener(mInputWrapper);
 		view.requestFocus();
@@ -125,7 +120,7 @@ public class GodotEditText extends EditText {
 	// ===========================================================
 	// Methods for/from SuperClass/Interfaces
 	// ===========================================================
-    @Override
+	@Override
 	public boolean onKeyDown(final int keyCode, final KeyEvent keyEvent) {
 		super.onKeyDown(keyCode, keyEvent);
 
@@ -142,7 +137,7 @@ public class GodotEditText extends EditText {
 	// ===========================================================
 	public void showKeyboard(String p_existing_text) {
 		this.mOriginText = p_existing_text;
-		
+
 		final Message msg = new Message();
 		msg.what = HANDLER_OPEN_IME_KEYBOARD;
 		msg.obj = this;
@@ -155,7 +150,7 @@ public class GodotEditText extends EditText {
 		msg.obj = this;
 		sHandler.sendMessage(msg);
 	}
-	
+
 	// ===========================================================
 	// Inner and Anonymous Classes
 	// ===========================================================
diff --git a/platform/android/java/src/org/godotengine/godot/input/GodotTextInputWrapper.java b/platform/android/java/src/org/godotengine/godot/input/GodotTextInputWrapper.java
index ac424ab9f8..8e34d9e9e7 100644
--- a/platform/android/java/src/org/godotengine/godot/input/GodotTextInputWrapper.java
+++ b/platform/android/java/src/org/godotengine/godot/input/GodotTextInputWrapper.java
@@ -67,7 +67,7 @@ public class GodotTextInputWrapper implements TextWatcher, OnEditorActionListene
 
 	private boolean isFullScreenEdit() {
 		final TextView textField = this.mEdit;
-		final InputMethodManager imm = (InputMethodManager) textField.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
+		final InputMethodManager imm = (InputMethodManager)textField.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
 		return imm.isFullscreenMode();
 	}
 
@@ -81,7 +81,6 @@ public class GodotTextInputWrapper implements TextWatcher, OnEditorActionListene
 
 	@Override
 	public void afterTextChanged(final Editable s) {
-
 	}
 
 	@Override
diff --git a/platform/android/java/src/org/godotengine/godot/input/InputManagerCompat.java b/platform/android/java/src/org/godotengine/godot/input/InputManagerCompat.java
index 4615d2fbb5..0a876d2b7f 100644
--- a/platform/android/java/src/org/godotengine/godot/input/InputManagerCompat.java
+++ b/platform/android/java/src/org/godotengine/godot/input/InputManagerCompat.java
@@ -23,118 +23,118 @@ import android.view.InputDevice;
 import android.view.MotionEvent;
 
 public interface InputManagerCompat {
-    /**
-     * Gets information about the input device with the specified id.
-     *
-     * @param id The device id
-     * @return The input device or null if not found
-     */
-    public InputDevice getInputDevice(int id);
+	/**
+	 * Gets information about the input device with the specified id.
+	 *
+	 * @param id The device id
+	 * @return The input device or null if not found
+	 */
+	public InputDevice getInputDevice(int id);
 
-    /**
-     * Gets the ids of all input devices in the system.
-     *
-     * @return The input device ids.
-     */
-    public int[] getInputDeviceIds();
+	/**
+	 * Gets the ids of all input devices in the system.
+	 *
+	 * @return The input device ids.
+	 */
+	public int[] getInputDeviceIds();
 
-    /**
-     * Registers an input device listener to receive notifications about when
-     * input devices are added, removed or changed.
-     *
-     * @param listener The listener to register.
-     * @param handler The handler on which the listener should be invoked, or
-     *            null if the listener should be invoked on the calling thread's
-     *            looper.
-     */
-    public void registerInputDeviceListener(InputManagerCompat.InputDeviceListener listener,
-            Handler handler);
+	/**
+	 * Registers an input device listener to receive notifications about when
+	 * input devices are added, removed or changed.
+	 *
+	 * @param listener The listener to register.
+	 * @param handler The handler on which the listener should be invoked, or
+	 *            null if the listener should be invoked on the calling thread's
+	 *            looper.
+	 */
+	public void registerInputDeviceListener(InputManagerCompat.InputDeviceListener listener,
+			Handler handler);
 
-    /**
-     * Unregisters an input device listener.
-     *
-     * @param listener The listener to unregister.
-     */
-    public void unregisterInputDeviceListener(InputManagerCompat.InputDeviceListener listener);
+	/**
+	 * Unregisters an input device listener.
+	 *
+	 * @param listener The listener to unregister.
+	 */
+	public void unregisterInputDeviceListener(InputManagerCompat.InputDeviceListener listener);
 
-    /*
-     * The following three calls are to simulate V16 behavior on pre-Jellybean
-     * devices. If you don't call them, your callback will never be called
-     * pre-API 16.
-     */
+	/*
+	 * The following three calls are to simulate V16 behavior on pre-Jellybean
+	 * devices. If you don't call them, your callback will never be called
+	 * pre-API 16.
+	 */
 
-    /**
-     * Pass the motion events to the InputManagerCompat. This is used to
-     * optimize for polling for controllers. If you do not pass these events in,
-     * polling will cause regular object creation.
-     *
-     * @param event the motion event from the app
-     */
-    public void onGenericMotionEvent(MotionEvent event);
+	/**
+	 * Pass the motion events to the InputManagerCompat. This is used to
+	 * optimize for polling for controllers. If you do not pass these events in,
+	 * polling will cause regular object creation.
+	 *
+	 * @param event the motion event from the app
+	 */
+	public void onGenericMotionEvent(MotionEvent event);
 
-    /**
-     * Tell the V9 input manager that it should stop polling for disconnected
-     * devices. You can call this during onPause in your activity, although you
-     * might want to call it whenever your game is not active (or whenever you
-     * don't care about being notified of new input devices)
-     */
-    public void onPause();
+	/**
+	 * Tell the V9 input manager that it should stop polling for disconnected
+	 * devices. You can call this during onPause in your activity, although you
+	 * might want to call it whenever your game is not active (or whenever you
+	 * don't care about being notified of new input devices)
+	 */
+	public void onPause();
 
-    /**
-     * Tell the V9 input manager that it should start polling for disconnected
-     * devices. You can call this during onResume in your activity, although you
-     * might want to call it less often (only when the gameplay is actually
-     * active)
-     */
-    public void onResume();
+	/**
+	 * Tell the V9 input manager that it should start polling for disconnected
+	 * devices. You can call this during onResume in your activity, although you
+	 * might want to call it less often (only when the gameplay is actually
+	 * active)
+	 */
+	public void onResume();
 
-    public interface InputDeviceListener {
-        /**
-         * Called whenever the input manager detects that a device has been
-         * added. This will only be called in the V9 version when a motion event
-         * is detected.
-         *
-         * @param deviceId The id of the input device that was added.
-         */
-        void onInputDeviceAdded(int deviceId);
+	public interface InputDeviceListener {
+		/**
+		 * Called whenever the input manager detects that a device has been
+		 * added. This will only be called in the V9 version when a motion event
+		 * is detected.
+		 *
+		 * @param deviceId The id of the input device that was added.
+		 */
+		void onInputDeviceAdded(int deviceId);
 
-        /**
-         * Called whenever the properties of an input device have changed since
-         * they were last queried. This will not be called for the V9 version of
-         * the API.
-         *
-         * @param deviceId The id of the input device that changed.
-         */
-        void onInputDeviceChanged(int deviceId);
+		/**
+		 * Called whenever the properties of an input device have changed since
+		 * they were last queried. This will not be called for the V9 version of
+		 * the API.
+		 *
+		 * @param deviceId The id of the input device that changed.
+		 */
+		void onInputDeviceChanged(int deviceId);
 
-        /**
-         * Called whenever the input manager detects that a device has been
-         * removed. For the V9 version, this can take some time depending on the
-         * poll rate.
-         *
-         * @param deviceId The id of the input device that was removed.
-         */
-        void onInputDeviceRemoved(int deviceId);
-    }
+		/**
+		 * Called whenever the input manager detects that a device has been
+		 * removed. For the V9 version, this can take some time depending on the
+		 * poll rate.
+		 *
+		 * @param deviceId The id of the input device that was removed.
+		 */
+		void onInputDeviceRemoved(int deviceId);
+	}
 
-    /**
-     * Use this to construct a compatible InputManager.
-     */
-    public static class Factory {
+	/**
+	 * Use this to construct a compatible InputManager.
+	 */
+	public static class Factory {
 
-        /**
-         * Constructs and returns a compatible InputManger
-         *
-         * @param context the Context that will be used to get the system
-         *            service from
-         * @return a compatible implementation of InputManager
-         */
-        public static InputManagerCompat getInputManager(Context context) {
-            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) {
-                return new InputManagerV16(context);
-            } else {
-                return new InputManagerV9();
-            }
-        }
-    }
+		/**
+		 * Constructs and returns a compatible InputManger
+		 *
+		 * @param context the Context that will be used to get the system
+		 *            service from
+		 * @return a compatible implementation of InputManager
+		 */
+		public static InputManagerCompat getInputManager(Context context) {
+			if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) {
+				return new InputManagerV16(context);
+			} else {
+				return new InputManagerV9();
+			}
+		}
+	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/input/InputManagerV16.java b/platform/android/java/src/org/godotengine/godot/input/InputManagerV16.java
index f05701f455..3b88609cc9 100644
--- a/platform/android/java/src/org/godotengine/godot/input/InputManagerV16.java
+++ b/platform/android/java/src/org/godotengine/godot/input/InputManagerV16.java
@@ -30,78 +30,74 @@ import java.util.Map;
 @TargetApi(Build.VERSION_CODES.JELLY_BEAN)
 public class InputManagerV16 implements InputManagerCompat {
 
-    private final InputManager mInputManager;
-    private final Map<InputManagerCompat.InputDeviceListener, V16InputDeviceListener> mListeners;
-
-    public InputManagerV16(Context context) {
-        mInputManager = (InputManager) context.getSystemService(Context.INPUT_SERVICE);
-        mListeners = new HashMap<InputManagerCompat.InputDeviceListener, V16InputDeviceListener>();
-    }
-
-    @Override
-    public InputDevice getInputDevice(int id) {
-        return mInputManager.getInputDevice(id);
-    }
-
-    @Override
-    public int[] getInputDeviceIds() {
-        return mInputManager.getInputDeviceIds();
-    }
-
-    static class V16InputDeviceListener implements InputManager.InputDeviceListener {
-        final InputManagerCompat.InputDeviceListener mIDL;
-
-        public V16InputDeviceListener(InputDeviceListener idl) {
-            mIDL = idl;
-        }
-
-        @Override
-        public void onInputDeviceAdded(int deviceId) {
-            mIDL.onInputDeviceAdded(deviceId);
-        }
-
-        @Override
-        public void onInputDeviceChanged(int deviceId) {
-            mIDL.onInputDeviceChanged(deviceId);
-        }
-
-        @Override
-        public void onInputDeviceRemoved(int deviceId) {
-            mIDL.onInputDeviceRemoved(deviceId);
-        }
-
-    }
-
-    @Override
-    public void registerInputDeviceListener(InputDeviceListener listener, Handler handler) {
-        V16InputDeviceListener v16Listener = new V16InputDeviceListener(listener);
-        mInputManager.registerInputDeviceListener(v16Listener, handler);
-        mListeners.put(listener, v16Listener);
-    }
-
-    @Override
-    public void unregisterInputDeviceListener(InputDeviceListener listener) {
-        V16InputDeviceListener curListener = mListeners.remove(listener);
-        if (null != curListener)
-        {
-            mInputManager.unregisterInputDeviceListener(curListener);
-        }
-
-    }
-
-    @Override
-    public void onGenericMotionEvent(MotionEvent event) {
-        // unused in V16
-    }
-
-    @Override
-    public void onPause() {
-        // unused in V16
-    }
-
-    @Override
-    public void onResume() {
-        // unused in V16
-    }
-
+	private final InputManager mInputManager;
+	private final Map<InputManagerCompat.InputDeviceListener, V16InputDeviceListener> mListeners;
+
+	public InputManagerV16(Context context) {
+		mInputManager = (InputManager)context.getSystemService(Context.INPUT_SERVICE);
+		mListeners = new HashMap<InputManagerCompat.InputDeviceListener, V16InputDeviceListener>();
+	}
+
+	@Override
+	public InputDevice getInputDevice(int id) {
+		return mInputManager.getInputDevice(id);
+	}
+
+	@Override
+	public int[] getInputDeviceIds() {
+		return mInputManager.getInputDeviceIds();
+	}
+
+	static class V16InputDeviceListener implements InputManager.InputDeviceListener {
+		final InputManagerCompat.InputDeviceListener mIDL;
+
+		public V16InputDeviceListener(InputDeviceListener idl) {
+			mIDL = idl;
+		}
+
+		@Override
+		public void onInputDeviceAdded(int deviceId) {
+			mIDL.onInputDeviceAdded(deviceId);
+		}
+
+		@Override
+		public void onInputDeviceChanged(int deviceId) {
+			mIDL.onInputDeviceChanged(deviceId);
+		}
+
+		@Override
+		public void onInputDeviceRemoved(int deviceId) {
+			mIDL.onInputDeviceRemoved(deviceId);
+		}
+	}
+
+	@Override
+	public void registerInputDeviceListener(InputDeviceListener listener, Handler handler) {
+		V16InputDeviceListener v16Listener = new V16InputDeviceListener(listener);
+		mInputManager.registerInputDeviceListener(v16Listener, handler);
+		mListeners.put(listener, v16Listener);
+	}
+
+	@Override
+	public void unregisterInputDeviceListener(InputDeviceListener listener) {
+		V16InputDeviceListener curListener = mListeners.remove(listener);
+		if (null != curListener) {
+			mInputManager.unregisterInputDeviceListener(curListener);
+		}
+	}
+
+	@Override
+	public void onGenericMotionEvent(MotionEvent event) {
+		// unused in V16
+	}
+
+	@Override
+	public void onPause() {
+		// unused in V16
+	}
+
+	@Override
+	public void onResume() {
+		// unused in V16
+	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/input/InputManagerV9.java b/platform/android/java/src/org/godotengine/godot/input/InputManagerV9.java
index 0334c00997..a1418c5899 100644
--- a/platform/android/java/src/org/godotengine/godot/input/InputManagerV9.java
+++ b/platform/android/java/src/org/godotengine/godot/input/InputManagerV9.java
@@ -31,181 +31,179 @@ import java.util.Map;
 import java.util.Queue;
 
 public class InputManagerV9 implements InputManagerCompat {
-    private static final String LOG_TAG = "InputManagerV9";
-    private static final int MESSAGE_TEST_FOR_DISCONNECT = 101;
-    private static final long CHECK_ELAPSED_TIME = 3000L;
-
-    private static final int ON_DEVICE_ADDED = 0;
-    private static final int ON_DEVICE_CHANGED = 1;
-    private static final int ON_DEVICE_REMOVED = 2;
-
-    private final SparseArray<long[]> mDevices;
-    private final Map<InputDeviceListener, Handler> mListeners;
-    private final Handler mDefaultHandler;
-
-    private static class PollingMessageHandler extends Handler {
-        private final WeakReference<InputManagerV9> mInputManager;
-
-        PollingMessageHandler(InputManagerV9 im) {
-            mInputManager = new WeakReference<InputManagerV9>(im);
-        }
-
-        @Override
-        public void handleMessage(Message msg) {
-            super.handleMessage(msg);
-            switch (msg.what) {
-                case MESSAGE_TEST_FOR_DISCONNECT:
-                    InputManagerV9 imv = mInputManager.get();
-                    if (null != imv) {
-                        long time = SystemClock.elapsedRealtime();
-                        int size = imv.mDevices.size();
-                        for (int i = 0; i < size; i++) {
-                            long[] lastContact = imv.mDevices.valueAt(i);
-                            if (null != lastContact) {
-                                if (time - lastContact[0] > CHECK_ELAPSED_TIME) {
-                                    // check to see if the device has been
-                                    // disconnected
-                                    int id = imv.mDevices.keyAt(i);
-                                    if (null == InputDevice.getDevice(id)) {
-                                        // disconnected!
-                                        imv.notifyListeners(ON_DEVICE_REMOVED, id);
-                                        imv.mDevices.remove(id);
-                                    } else {
-                                        lastContact[0] = time;
-                                    }
-                                }
-                            }
-                        }
-                        sendEmptyMessageDelayed(MESSAGE_TEST_FOR_DISCONNECT,
-                                CHECK_ELAPSED_TIME);
-                    }
-                    break;
-            }
-        }
-
-    }
-
-    public InputManagerV9() {
-        mDevices = new SparseArray<long[]>();
-        mListeners = new HashMap<InputDeviceListener, Handler>();
-        mDefaultHandler = new PollingMessageHandler(this);
-        // as a side-effect, populates our collection of watched
-        // input devices
-        getInputDeviceIds();
-    }
-
-    @Override
-    public InputDevice getInputDevice(int id) {
-        return InputDevice.getDevice(id);
-    }
-
-    @Override
-    public int[] getInputDeviceIds() {
-        // add any hitherto unknown devices to our
-        // collection of watched input devices
-        int[] activeDevices = InputDevice.getDeviceIds();
-        long time = SystemClock.elapsedRealtime();
-        for ( int id : activeDevices ) {
-            long[] lastContact = mDevices.get(id);
-            if ( null == lastContact ) {
-                // we have a new device
-                mDevices.put(id, new long[] { time });
-            }
-        }
-        return activeDevices;
-    }
-
-    @Override
-    public void registerInputDeviceListener(InputDeviceListener listener, Handler handler) {
-        mListeners.remove(listener);
-        if (handler == null) {
-            handler = mDefaultHandler;
-        }
-        mListeners.put(listener, handler);
-    }
-
-    @Override
-    public void unregisterInputDeviceListener(InputDeviceListener listener) {
-        mListeners.remove(listener);
-    }
-
-    private void notifyListeners(int why, int deviceId) {
-        // the state of some device has changed
-        if (!mListeners.isEmpty()) {
-            // yes... this will cause an object to get created... hopefully
-            // it won't happen very often
-            for (InputDeviceListener listener : mListeners.keySet()) {
-                Handler handler = mListeners.get(listener);
-                DeviceEvent odc = DeviceEvent.getDeviceEvent(why, deviceId, listener);
-                handler.post(odc);
-            }
-        }
-    }
-
-    private static class DeviceEvent implements Runnable {
-        private int mMessageType;
-        private int mId;
-        private InputDeviceListener mListener;
-        private static Queue<DeviceEvent> sEventQueue = new ArrayDeque<DeviceEvent>();
-
-        private DeviceEvent() {
-        }
-
-        static DeviceEvent getDeviceEvent(int messageType, int id,
-                InputDeviceListener listener) {
-            DeviceEvent curChanged = sEventQueue.poll();
-            if (null == curChanged) {
-                curChanged = new DeviceEvent();
-            }
-            curChanged.mMessageType = messageType;
-            curChanged.mId = id;
-            curChanged.mListener = listener;
-            return curChanged;
-        }
-
-        @Override
-        public void run() {
-            switch (mMessageType) {
-                case ON_DEVICE_ADDED:
-                    mListener.onInputDeviceAdded(mId);
-                    break;
-                case ON_DEVICE_CHANGED:
-                    mListener.onInputDeviceChanged(mId);
-                    break;
-                case ON_DEVICE_REMOVED:
-                    mListener.onInputDeviceRemoved(mId);
-                    break;
-                default:
-                    Log.e(LOG_TAG, "Unknown Message Type");
-                    break;
-            }
-            // dump this runnable back in the queue
-            sEventQueue.offer(this);
-        }
-    }
-
-    @Override
-    public void onGenericMotionEvent(MotionEvent event) {
-        // detect new devices
-        int id = event.getDeviceId();
-        long[] timeArray = mDevices.get(id);
-        if (null == timeArray) {
-            notifyListeners(ON_DEVICE_ADDED, id);
-            timeArray = new long[1];
-            mDevices.put(id, timeArray);
-        }
-        long time = SystemClock.elapsedRealtime();
-        timeArray[0] = time;
-    }
-
-    @Override
-    public void onPause() {
-        mDefaultHandler.removeMessages(MESSAGE_TEST_FOR_DISCONNECT);
-    }
-
-    @Override
-    public void onResume() {
-        mDefaultHandler.sendEmptyMessage(MESSAGE_TEST_FOR_DISCONNECT);
-    }
-
+	private static final String LOG_TAG = "InputManagerV9";
+	private static final int MESSAGE_TEST_FOR_DISCONNECT = 101;
+	private static final long CHECK_ELAPSED_TIME = 3000L;
+
+	private static final int ON_DEVICE_ADDED = 0;
+	private static final int ON_DEVICE_CHANGED = 1;
+	private static final int ON_DEVICE_REMOVED = 2;
+
+	private final SparseArray<long[]> mDevices;
+	private final Map<InputDeviceListener, Handler> mListeners;
+	private final Handler mDefaultHandler;
+
+	private static class PollingMessageHandler extends Handler {
+		private final WeakReference<InputManagerV9> mInputManager;
+
+		PollingMessageHandler(InputManagerV9 im) {
+			mInputManager = new WeakReference<InputManagerV9>(im);
+		}
+
+		@Override
+		public void handleMessage(Message msg) {
+			super.handleMessage(msg);
+			switch (msg.what) {
+				case MESSAGE_TEST_FOR_DISCONNECT:
+					InputManagerV9 imv = mInputManager.get();
+					if (null != imv) {
+						long time = SystemClock.elapsedRealtime();
+						int size = imv.mDevices.size();
+						for (int i = 0; i < size; i++) {
+							long[] lastContact = imv.mDevices.valueAt(i);
+							if (null != lastContact) {
+								if (time - lastContact[0] > CHECK_ELAPSED_TIME) {
+									// check to see if the device has been
+									// disconnected
+									int id = imv.mDevices.keyAt(i);
+									if (null == InputDevice.getDevice(id)) {
+										// disconnected!
+										imv.notifyListeners(ON_DEVICE_REMOVED, id);
+										imv.mDevices.remove(id);
+									} else {
+										lastContact[0] = time;
+									}
+								}
+							}
+						}
+						sendEmptyMessageDelayed(MESSAGE_TEST_FOR_DISCONNECT,
+								CHECK_ELAPSED_TIME);
+					}
+					break;
+			}
+		}
+	}
+
+	public InputManagerV9() {
+		mDevices = new SparseArray<long[]>();
+		mListeners = new HashMap<InputDeviceListener, Handler>();
+		mDefaultHandler = new PollingMessageHandler(this);
+		// as a side-effect, populates our collection of watched
+		// input devices
+		getInputDeviceIds();
+	}
+
+	@Override
+	public InputDevice getInputDevice(int id) {
+		return InputDevice.getDevice(id);
+	}
+
+	@Override
+	public int[] getInputDeviceIds() {
+		// add any hitherto unknown devices to our
+		// collection of watched input devices
+		int[] activeDevices = InputDevice.getDeviceIds();
+		long time = SystemClock.elapsedRealtime();
+		for (int id : activeDevices) {
+			long[] lastContact = mDevices.get(id);
+			if (null == lastContact) {
+				// we have a new device
+				mDevices.put(id, new long[] { time });
+			}
+		}
+		return activeDevices;
+	}
+
+	@Override
+	public void registerInputDeviceListener(InputDeviceListener listener, Handler handler) {
+		mListeners.remove(listener);
+		if (handler == null) {
+			handler = mDefaultHandler;
+		}
+		mListeners.put(listener, handler);
+	}
+
+	@Override
+	public void unregisterInputDeviceListener(InputDeviceListener listener) {
+		mListeners.remove(listener);
+	}
+
+	private void notifyListeners(int why, int deviceId) {
+		// the state of some device has changed
+		if (!mListeners.isEmpty()) {
+			// yes... this will cause an object to get created... hopefully
+			// it won't happen very often
+			for (InputDeviceListener listener : mListeners.keySet()) {
+				Handler handler = mListeners.get(listener);
+				DeviceEvent odc = DeviceEvent.getDeviceEvent(why, deviceId, listener);
+				handler.post(odc);
+			}
+		}
+	}
+
+	private static class DeviceEvent implements Runnable {
+		private int mMessageType;
+		private int mId;
+		private InputDeviceListener mListener;
+		private static Queue<DeviceEvent> sEventQueue = new ArrayDeque<DeviceEvent>();
+
+		private DeviceEvent() {
+		}
+
+		static DeviceEvent getDeviceEvent(int messageType, int id,
+				InputDeviceListener listener) {
+			DeviceEvent curChanged = sEventQueue.poll();
+			if (null == curChanged) {
+				curChanged = new DeviceEvent();
+			}
+			curChanged.mMessageType = messageType;
+			curChanged.mId = id;
+			curChanged.mListener = listener;
+			return curChanged;
+		}
+
+		@Override
+		public void run() {
+			switch (mMessageType) {
+				case ON_DEVICE_ADDED:
+					mListener.onInputDeviceAdded(mId);
+					break;
+				case ON_DEVICE_CHANGED:
+					mListener.onInputDeviceChanged(mId);
+					break;
+				case ON_DEVICE_REMOVED:
+					mListener.onInputDeviceRemoved(mId);
+					break;
+				default:
+					Log.e(LOG_TAG, "Unknown Message Type");
+					break;
+			}
+			// dump this runnable back in the queue
+			sEventQueue.offer(this);
+		}
+	}
+
+	@Override
+	public void onGenericMotionEvent(MotionEvent event) {
+		// detect new devices
+		int id = event.getDeviceId();
+		long[] timeArray = mDevices.get(id);
+		if (null == timeArray) {
+			notifyListeners(ON_DEVICE_ADDED, id);
+			timeArray = new long[1];
+			mDevices.put(id, timeArray);
+		}
+		long time = SystemClock.elapsedRealtime();
+		timeArray[0] = time;
+	}
+
+	@Override
+	public void onPause() {
+		mDefaultHandler.removeMessages(MESSAGE_TEST_FOR_DISCONNECT);
+	}
+
+	@Override
+	public void onResume() {
+		mDefaultHandler.sendEmptyMessage(MESSAGE_TEST_FOR_DISCONNECT);
+	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/ConsumeTask.java b/platform/android/java/src/org/godotengine/godot/payments/ConsumeTask.java
index 8622f4ccff..d6f26e19b2 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/ConsumeTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/ConsumeTask.java
@@ -39,33 +39,32 @@ import android.util.Log;
 abstract public class ConsumeTask {
 
 	private Context context;
-	
+
 	private IInAppBillingService mService;
-	public ConsumeTask(IInAppBillingService mService, Context context ){
+	public ConsumeTask(IInAppBillingService mService, Context context) {
 		this.context = context;
 		this.mService = mService;
 	}
-	
 
-	public void consume(final String sku){
+	public void consume(final String sku) {
 		//Log.d("XXX", "Consuming product " + sku);
 		PaymentsCache pc = new PaymentsCache(context);
 		Boolean isBlocked = pc.getConsumableFlag("block", sku);
 		String _token = pc.getConsumableValue("token", sku);
-		//Log.d("XXX", "token " + _token);		
-		if(!isBlocked && _token == null){
+		//Log.d("XXX", "token " + _token);
+		if (!isBlocked && _token == null) {
 			//_token = "inapp:"+context.getPackageName()+":android.test.purchased";
 			//Log.d("XXX", "Consuming product " + sku + " with token " + _token);
-		}else if(!isBlocked){
+		} else if (!isBlocked) {
 			//Log.d("XXX", "It is not blocked ¿?");
 			return;
-		}else if(_token == null){
+		} else if (_token == null) {
 			//Log.d("XXX", "No token available");
 			this.error("No token for sku:" + sku);
 			return;
 		}
 		final String token = _token;
-		new AsyncTask<String, String, String>(){
+		new AsyncTask<String, String, String>() {
 
 			@Override
 			protected String doInBackground(String... params) {
@@ -73,28 +72,27 @@ abstract public class ConsumeTask {
 					//Log.d("XXX", "Requesting to release item.");
 					int response = mService.consumePurchase(3, context.getPackageName(), token);
 					//Log.d("XXX", "release response code: " + response);
-					if(response == 0 || response == 8){
+					if (response == 0 || response == 8) {
 						return null;
 					}
 				} catch (RemoteException e) {
 					return e.getMessage();
-					
 				}
 				return "Some error";
 			}
-			
-			protected void onPostExecute(String param){
-				if(param == null){
-					success( new PaymentsCache(context).getConsumableValue("ticket", sku) );
-				}else{
+
+			protected void onPostExecute(String param) {
+				if (param == null) {
+					success(new PaymentsCache(context).getConsumableValue("ticket", sku));
+				} else {
 					error(param);
 				}
 			}
-			
-		}.execute();
+
+		}
+				.execute();
 	}
-	
+
 	abstract protected void success(String ticket);
 	abstract protected void error(String message);
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/GenericConsumeTask.java b/platform/android/java/src/org/godotengine/godot/payments/GenericConsumeTask.java
index 0afe35510c..31f6396738 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/GenericConsumeTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/GenericConsumeTask.java
@@ -36,15 +36,12 @@ import android.os.AsyncTask;
 import android.os.RemoteException;
 import android.util.Log;
 
-abstract public class GenericConsumeTask extends AsyncTask<String, String, String>{
+abstract public class GenericConsumeTask extends AsyncTask<String, String, String> {
 
 	private Context context;
 	private IInAppBillingService mService;
 
-	
-	
-	
-	public GenericConsumeTask(Context context, IInAppBillingService mService, String sku, String receipt, String signature, String token){
+	public GenericConsumeTask(Context context, IInAppBillingService mService, String sku, String receipt, String signature, String token) {
 		this.context = context;
 		this.mService = mService;
 		this.sku = sku;
@@ -52,19 +49,19 @@ abstract public class GenericConsumeTask extends AsyncTask<String, String, Strin
 		this.signature = signature;
 		this.token = token;
 	}
-	
+
 	private String sku;
 	private String receipt;
 	private String signature;
 	private String token;
-	
+
 	@Override
 	protected String doInBackground(String... params) {
 		try {
 			//Log.d("godot", "Requesting to consume an item with token ." + token);
 			int response = mService.consumePurchase(3, context.getPackageName(), token);
 			//Log.d("godot", "consumePurchase response: " + response);
-			if(response == 0 || response == 8){
+			if (response == 0 || response == 8) {
 				return null;
 			}
 		} catch (Exception e) {
@@ -72,11 +69,10 @@ abstract public class GenericConsumeTask extends AsyncTask<String, String, Strin
 		}
 		return null;
 	}
-	
-	protected void onPostExecute(String sarasa){
+
+	protected void onPostExecute(String sarasa) {
 		onSuccess(sku, receipt, signature, token);
 	}
-	
-	abstract public void onSuccess(String sku, String receipt, String signature, String token);
 
+	abstract public void onSuccess(String sku, String receipt, String signature, String token);
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/HandlePurchaseTask.java b/platform/android/java/src/org/godotengine/godot/payments/HandlePurchaseTask.java
index 7318ae2fc6..80f53d16c8 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/HandlePurchaseTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/HandlePurchaseTask.java
@@ -50,28 +50,26 @@ import android.util.Log;
 abstract public class HandlePurchaseTask {
 
 	private Activity context;
-	
-	public HandlePurchaseTask(Activity context ){
+
+	public HandlePurchaseTask(Activity context) {
 		this.context = context;
 	}
-	
-	
-	public void handlePurchaseRequest(int resultCode, Intent data){
+
+	public void handlePurchaseRequest(int resultCode, Intent data) {
 		//Log.d("XXX", "Handling purchase response");
 		//int responseCode = data.getIntExtra("RESPONSE_CODE", 0);
 		PaymentsCache pc = new PaymentsCache(context);
-		
+
 		String purchaseData = data.getStringExtra("INAPP_PURCHASE_DATA");
 		//Log.d("XXX", "Purchase data:" + purchaseData);
 		String dataSignature = data.getStringExtra("INAPP_DATA_SIGNATURE");
 		//Log.d("XXX", "Purchase signature:" + dataSignature);
-		
+
 		if (resultCode == Activity.RESULT_OK) {
-			
+
 			try {
 				//Log.d("SARLANGA", purchaseData);
-				
-				
+
 				JSONObject jo = new JSONObject(purchaseData);
 				//String sku = jo.getString("productId");
 				//alert("You have bought the " + sku + ". Excellent choice, aventurer!");
@@ -82,8 +80,8 @@ abstract public class HandlePurchaseTask {
 				//Integer state = jo.getInt("purchaseState");
 				String developerPayload = jo.getString("developerPayload");
 				String purchaseToken = jo.getString("purchaseToken");
-				
-				if(! pc.getConsumableValue("validation_hash", productId).equals(developerPayload) ) {
+
+				if (!pc.getConsumableValue("validation_hash", productId).equals(developerPayload)) {
 					error("Untrusted callback");
 					return;
 				}
@@ -92,13 +90,13 @@ abstract public class HandlePurchaseTask {
 				pc.setConsumableValue("ticket", productId, purchaseData);
 				pc.setConsumableFlag("block", productId, true);
 				pc.setConsumableValue("token", productId, purchaseToken);
-				
+
 				success(productId, dataSignature, purchaseData);
 				return;
-			}	catch (JSONException e) {
+			} catch (JSONException e) {
 				error(e.getMessage());
 			}
-		}else if( resultCode == Activity.RESULT_CANCELED){
+		} else if (resultCode == Activity.RESULT_CANCELED) {
 			canceled();
 		}
 	}
@@ -106,6 +104,4 @@ abstract public class HandlePurchaseTask {
 	abstract protected void success(String sku, String signature, String ticket);
 	abstract protected void error(String message);
 	abstract protected void canceled();
-
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/PaymentsCache.java b/platform/android/java/src/org/godotengine/godot/payments/PaymentsCache.java
index 69ac02e902..f9828ef77d 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/PaymentsCache.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/PaymentsCache.java
@@ -34,41 +34,38 @@ import android.content.SharedPreferences;
 import android.util.Log;
 
 public class PaymentsCache {
-	
+
 	public Context context;
 
-	public PaymentsCache(Context context){
+	public PaymentsCache(Context context) {
 		this.context = context;
 	}
-	
-	
-	public void setConsumableFlag(String set, String sku, Boolean flag){
-		SharedPreferences sharedPref = context.getSharedPreferences("consumables_" + set, Context.MODE_PRIVATE); 
-	    SharedPreferences.Editor editor = sharedPref.edit();
-	    editor.putBoolean(sku, flag);
-	    editor.commit();
-}
 
-	public boolean getConsumableFlag(String set, String sku){
-	    SharedPreferences sharedPref = context.getSharedPreferences(
-	    		"consumables_" + set, Context.MODE_PRIVATE);
-	    return sharedPref.getBoolean(sku, false);
+	public void setConsumableFlag(String set, String sku, Boolean flag) {
+		SharedPreferences sharedPref = context.getSharedPreferences("consumables_" + set, Context.MODE_PRIVATE);
+		SharedPreferences.Editor editor = sharedPref.edit();
+		editor.putBoolean(sku, flag);
+		editor.commit();
 	}
 
+	public boolean getConsumableFlag(String set, String sku) {
+		SharedPreferences sharedPref = context.getSharedPreferences(
+				"consumables_" + set, Context.MODE_PRIVATE);
+		return sharedPref.getBoolean(sku, false);
+	}
 
-	public void setConsumableValue(String set, String sku, String value){
-		SharedPreferences sharedPref = context.getSharedPreferences("consumables_" + set, Context.MODE_PRIVATE); 
-	    SharedPreferences.Editor editor = sharedPref.edit();
-	    editor.putString(sku, value);
+	public void setConsumableValue(String set, String sku, String value) {
+		SharedPreferences sharedPref = context.getSharedPreferences("consumables_" + set, Context.MODE_PRIVATE);
+		SharedPreferences.Editor editor = sharedPref.edit();
+		editor.putString(sku, value);
 		//Log.d("XXX", "Setting asset: consumables_" + set + ":" + sku);
-	    editor.commit();
+		editor.commit();
 	}
 
-	public String getConsumableValue(String set, String sku){
-	    SharedPreferences sharedPref = context.getSharedPreferences(
-	    		"consumables_" + set, Context.MODE_PRIVATE);
+	public String getConsumableValue(String set, String sku) {
+		SharedPreferences sharedPref = context.getSharedPreferences(
+				"consumables_" + set, Context.MODE_PRIVATE);
 		//Log.d("XXX", "Getting asset: consumables_" + set + ":" + sku);
-	    return sharedPref.getString(sku, null);
+		return sharedPref.getString(sku, null);
 	}
-
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/PaymentsManager.java b/platform/android/java/src/org/godotengine/godot/payments/PaymentsManager.java
index b327265abb..71407566e2 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/PaymentsManager.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/PaymentsManager.java
@@ -106,7 +106,6 @@ public class PaymentsManager {
 			@Override
 			protected void error(String message) {
 				godotPaymentV3.callbackFail();
-
 			}
 
 			@Override
@@ -119,8 +118,8 @@ public class PaymentsManager {
 				godotPaymentV3.callbackAlreadyOwned(sku);
 			}
 
-		}.purchase(sku, transactionId);
-
+		}
+				.purchase(sku, transactionId);
 	}
 
 	public void consumeUnconsumedPurchases() {
@@ -135,16 +134,15 @@ public class PaymentsManager {
 			protected void error(String message) {
 				Log.d("godot", "consumeUnconsumedPurchases :" + message);
 				godotPaymentV3.callbackFailConsume();
-
 			}
 
 			@Override
 			protected void notRequired() {
 				Log.d("godot", "callbackSuccessNoUnconsumedPurchases :");
 				godotPaymentV3.callbackSuccessNoUnconsumedPurchases();
-
 			}
-		}.consumeItAll();
+		}
+				.consumeItAll();
 	}
 
 	public void requestPurchased() {
@@ -210,9 +208,9 @@ public class PaymentsManager {
 						@Override
 						protected void error(String message) {
 							godotPaymentV3.callbackFail();
-
 						}
-					}.consume(sku);
+					}
+							.consume(sku);
 				}
 			}
 
@@ -225,7 +223,8 @@ public class PaymentsManager {
 			protected void canceled() {
 				godotPaymentV3.callbackCancel();
 			}
-		}.handlePurchaseRequest(resultCode, data);
+		}
+				.handlePurchaseRequest(resultCode, data);
 	}
 
 	public void validatePurchase(String purchaseToken, final String sku) {
@@ -246,8 +245,8 @@ public class PaymentsManager {
 					protected void error(String message) {
 						godotPaymentV3.callbackFail();
 					}
-				}.consume(sku);
-
+				}
+						.consume(sku);
 			}
 
 			@Override
@@ -259,7 +258,8 @@ public class PaymentsManager {
 			protected void canceled() {
 				godotPaymentV3.callbackCancel();
 			}
-		}.validatePurchase(sku);
+		}
+				.validatePurchase(sku);
 	}
 
 	public void setAutoConsume(boolean autoConsume) {
@@ -278,7 +278,8 @@ public class PaymentsManager {
 			protected void error(String message) {
 				godotPaymentV3.callbackFailConsume();
 			}
-		}.consume(sku);
+		}
+				.consume(sku);
 	}
 
 	// Workaround to bug where sometimes response codes come as Long instead of Integer
@@ -287,8 +288,10 @@ public class PaymentsManager {
 		if (o == null) {
 			//logDebug("Bundle with null response code, assuming OK (known issue)");
 			return BILLING_RESPONSE_RESULT_OK;
-		} else if (o instanceof Integer) return ((Integer) o).intValue();
-		else if (o instanceof Long) return (int) ((Long) o).longValue();
+		} else if (o instanceof Integer)
+			return ((Integer)o).intValue();
+		else if (o instanceof Long)
+			return (int)((Long)o).longValue();
 		else {
 			//logError("Unexpected type for bundle response code.");
 			//logError(o.getClass().getName());
@@ -304,25 +307,41 @@ public class PaymentsManager {
 	 * It also includes the result code numerically.
 	 */
 	public static String getResponseDesc(int code) {
-		String[] iab_msgs = ("0:OK/1:User Canceled/2:Unknown/" +
-				"3:Billing Unavailable/4:Item unavailable/" +
-				"5:Developer Error/6:Error/7:Item Already Owned/" +
-				"8:Item not owned").split("/");
-		String[] iabhelper_msgs = ("0:OK/-1001:Remote exception during initialization/" +
-				"-1002:Bad response received/" +
-				"-1003:Purchase signature verification failed/" +
-				"-1004:Send intent failed/" +
-				"-1005:User cancelled/" +
-				"-1006:Unknown purchase response/" +
-				"-1007:Missing token/" +
-				"-1008:Unknown error/" +
-				"-1009:Subscriptions not available/" +
-				"-1010:Invalid consumption attempt").split("/");
+		String[] iab_msgs = ("0:OK/1:User Canceled/2:Unknown/"
+							 +
+							 "3:Billing Unavailable/4:Item unavailable/"
+							 +
+							 "5:Developer Error/6:Error/7:Item Already Owned/"
+							 +
+							 "8:Item not owned")
+									.split("/");
+		String[] iabhelper_msgs = ("0:OK/-1001:Remote exception during initialization/"
+								   +
+								   "-1002:Bad response received/"
+								   +
+								   "-1003:Purchase signature verification failed/"
+								   +
+								   "-1004:Send intent failed/"
+								   +
+								   "-1005:User cancelled/"
+								   +
+								   "-1006:Unknown purchase response/"
+								   +
+								   "-1007:Missing token/"
+								   +
+								   "-1008:Unknown error/"
+								   +
+								   "-1009:Subscriptions not available/"
+								   +
+								   "-1010:Invalid consumption attempt")
+										  .split("/");
 
 		if (code <= -1000) {
 			int index = -1000 - code;
-			if (index >= 0 && index < iabhelper_msgs.length) return iabhelper_msgs[index];
-			else return String.valueOf(code) + ":Unknown IAB Helper Error";
+			if (index >= 0 && index < iabhelper_msgs.length)
+				return iabhelper_msgs[index];
+			else
+				return String.valueOf(code) + ":Unknown IAB Helper Error";
 		} else if (code < 0 || code >= iab_msgs.length)
 			return String.valueOf(code) + ":Unknown";
 		else
@@ -375,7 +394,7 @@ public class PaymentsManager {
 						ArrayList<String> responseList = skuDetails.getStringArrayList("DETAILS_LIST");
 
 						for (String thisResponse : responseList) {
-							Log.d("godot", "response = "+thisResponse);
+							Log.d("godot", "response = " + thisResponse);
 							godotPaymentV3.addSkuDetail(thisResponse);
 						}
 					} catch (RemoteException e) {
@@ -385,7 +404,8 @@ public class PaymentsManager {
 				}
 				godotPaymentV3.completeSkuDetail();
 			}
-		})).start();
+		}))
+				.start();
 	}
 
 	private GodotPaymentV3 godotPaymentV3;
@@ -393,5 +413,4 @@ public class PaymentsManager {
 	public void setBaseSingleton(GodotPaymentV3 godotPaymentV3) {
 		this.godotPaymentV3 = godotPaymentV3;
 	}
-
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/PurchaseTask.java b/platform/android/java/src/org/godotengine/godot/payments/PurchaseTask.java
index 6ecea0106c..e2f08345ad 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/PurchaseTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/PurchaseTask.java
@@ -50,17 +50,16 @@ import android.util.Log;
 abstract public class PurchaseTask {
 
 	private Activity context;
-	
+
 	private IInAppBillingService mService;
-	public PurchaseTask(IInAppBillingService mService, Activity context ){
+	public PurchaseTask(IInAppBillingService mService, Activity context) {
 		this.context = context;
 		this.mService = mService;
 	}
-	
 
 	private boolean isLooping = false;
-	
-	public void purchase(final String sku, final String transactionId){
+
+	public void purchase(final String sku, final String transactionId) {
 		Log.d("XXX", "Starting purchase for: " + sku);
 		PaymentsCache pc = new PaymentsCache(context);
 		Boolean isBlocked = pc.getConsumableFlag("block", sku);
@@ -75,7 +74,7 @@ abstract public class PurchaseTask {
 
 		Bundle buyIntentBundle;
 		try {
-			buyIntentBundle = mService.getBuyIntent(3, context.getApplicationContext().getPackageName(), sku, "inapp", hash  );
+			buyIntentBundle = mService.getBuyIntent(3, context.getApplicationContext().getPackageName(), sku, "inapp", hash);
 		} catch (RemoteException e) {
 			//Log.d("XXX", "Error: " + e.getMessage());
 			error(e.getMessage());
@@ -83,50 +82,45 @@ abstract public class PurchaseTask {
 		}
 		Object rc = buyIntentBundle.get("RESPONSE_CODE");
 		int responseCode = 0;
-		if(rc == null){
+		if (rc == null) {
 			responseCode = PaymentsManager.BILLING_RESPONSE_RESULT_OK;
-		}else if( rc instanceof Integer){
+		} else if (rc instanceof Integer) {
 			responseCode = ((Integer)rc).intValue();
-		}else if( rc instanceof Long){
+		} else if (rc instanceof Long) {
 			responseCode = (int)((Long)rc).longValue();
 		}
 		//Log.d("XXX", "Buy intent response code: " + responseCode);
-		if(responseCode == 1 || responseCode == 3 || responseCode == 4){
+		if (responseCode == 1 || responseCode == 3 || responseCode == 4) {
 			canceled();
 			return;
 		}
-		if(responseCode == 7){
+		if (responseCode == 7) {
 			alreadyOwned();
 			return;
 		}
-			
-		
+
 		PendingIntent pendingIntent = buyIntentBundle.getParcelable("BUY_INTENT");
 		pc.setConsumableValue("validation_hash", sku, hash);
 		try {
-			if(context == null){
+			if (context == null) {
 				//Log.d("XXX", "No context!");
 			}
-			if(pendingIntent == null){
+			if (pendingIntent == null) {
 				//Log.d("XXX", "No pending intent");
 			}
 			//Log.d("XXX", "Starting activity for purchase!");
 			context.startIntentSenderForResult(
 					pendingIntent.getIntentSender(),
-					PaymentsManager.REQUEST_CODE_FOR_PURCHASE, 
-					new Intent(), 
+					PaymentsManager.REQUEST_CODE_FOR_PURCHASE,
+					new Intent(),
 					Integer.valueOf(0), Integer.valueOf(0),
-					   Integer.valueOf(0));
+					Integer.valueOf(0));
 		} catch (SendIntentException e) {
 			error(e.getMessage());
 		}
-		
-		
-		
 	}
 
 	abstract protected void error(String message);
 	abstract protected void canceled();
 	abstract protected void alreadyOwned();
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/ReleaseAllConsumablesTask.java b/platform/android/java/src/org/godotengine/godot/payments/ReleaseAllConsumablesTask.java
index d831e45694..765906d0bb 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/ReleaseAllConsumablesTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/ReleaseAllConsumablesTask.java
@@ -48,69 +48,63 @@ abstract public class ReleaseAllConsumablesTask {
 
 	private Context context;
 	private IInAppBillingService mService;
-	
-	public ReleaseAllConsumablesTask(IInAppBillingService mService, Context context ){
+
+	public ReleaseAllConsumablesTask(IInAppBillingService mService, Context context) {
 		this.context = context;
 		this.mService = mService;
 	}
-	
 
-	public void consumeItAll(){
-		try{
+	public void consumeItAll() {
+		try {
 			//Log.d("godot", "consumeItall for " + context.getPackageName());
-			Bundle bundle = mService.getPurchases(3, context.getPackageName(), "inapp",null);
-			
+			Bundle bundle = mService.getPurchases(3, context.getPackageName(), "inapp", null);
+
 			for (String key : bundle.keySet()) {
 				Object value = bundle.get(key);
 				//Log.d("godot", String.format("%s %s (%s)", key,
 				//value.toString(), value.getClass().getName()));
 			}
-			
-			
-			if (bundle.getInt("RESPONSE_CODE") == 0){
+
+			if (bundle.getInt("RESPONSE_CODE") == 0) {
 
 				final ArrayList<String> myPurchases = bundle.getStringArrayList("INAPP_PURCHASE_DATA_LIST");
 				final ArrayList<String> mySignatures = bundle.getStringArrayList("INAPP_DATA_SIGNATURE_LIST");
-				
 
-				if (myPurchases == null || myPurchases.size() == 0){
+				if (myPurchases == null || myPurchases.size() == 0) {
 					//Log.d("godot", "No purchases!");
 					notRequired();
 					return;
 				}
-		
-				
+
 				//Log.d("godot", "# products to be consumed:" + myPurchases.size());
-				for (int i=0;i<myPurchases.size();i++)
-				{
-					
-					try{
+				for (int i = 0; i < myPurchases.size(); i++) {
+
+					try {
 						String receipt = myPurchases.get(i);
 						JSONObject inappPurchaseData = new JSONObject(receipt);
 						String sku = inappPurchaseData.getString("productId");
 						String token = inappPurchaseData.getString("purchaseToken");
 						String signature = mySignatures.get(i);
 						//Log.d("godot", "A punto de consumir un item con token:" + token + "\n" + receipt);
-						new GenericConsumeTask(context, mService, sku, receipt,signature, token) {
-							
+						new GenericConsumeTask(context, mService, sku, receipt, signature, token) {
+
 							@Override
 							public void onSuccess(String sku, String receipt, String signature, String token) {
 								ReleaseAllConsumablesTask.this.success(sku, receipt, signature, token);
 							}
-						}.execute();
-						
+						}
+								.execute();
+
 					} catch (JSONException e) {
 					}
 				}
-
 			}
-		}catch(Exception e){
+		} catch (Exception e) {
 			Log.d("godot", "Error releasing products:" + e.getClass().getName() + ":" + e.getMessage());
 		}
 	}
-	
+
 	abstract protected void success(String sku, String receipt, String signature, String token);
 	abstract protected void error(String message);
 	abstract protected void notRequired();
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/payments/ValidateTask.java b/platform/android/java/src/org/godotengine/godot/payments/ValidateTask.java
index 6e058c140c..3e62ef282a 100644
--- a/platform/android/java/src/org/godotengine/godot/payments/ValidateTask.java
+++ b/platform/android/java/src/org/godotengine/godot/payments/ValidateTask.java
@@ -55,22 +55,21 @@ abstract public class ValidateTask {
 
 	private Activity context;
 	private GodotPaymentV3 godotPaymentsV3;
-	public ValidateTask(Activity context, GodotPaymentV3 godotPaymentsV3){
+	public ValidateTask(Activity context, GodotPaymentV3 godotPaymentsV3) {
 		this.context = context;
 		this.godotPaymentsV3 = godotPaymentsV3;
 	}
-	
-	public void validatePurchase(final String sku){
-		new AsyncTask<String, String, String>(){
 
-			
+	public void validatePurchase(final String sku) {
+		new AsyncTask<String, String, String>() {
+
 			private ProgressDialog dialog;
 
 			@Override
-			protected void onPreExecute(){
+			protected void onPreExecute() {
 				dialog = ProgressDialog.show(context, null, "Please wait...");
 			}
-			
+
 			@Override
 			protected String doInBackground(String... params) {
 				PaymentsCache pc = new PaymentsCache(context);
@@ -90,37 +89,34 @@ abstract public class ValidateTask {
 				//Log.d("XXX", "Validation response:\n"+jsonResponse);
 				return jsonResponse;
 			}
-			
+
 			@Override
-			protected void onPostExecute(String response){
-				if(dialog != null){
+			protected void onPostExecute(String response) {
+				if (dialog != null) {
 					dialog.dismiss();
 				}
 				JSONObject j;
 				try {
 					j = new JSONObject(response);
-					if(j.getString("status").equals("OK")){
+					if (j.getString("status").equals("OK")) {
 						success();
 						return;
-					}else if(j.getString("status") != null){
+					} else if (j.getString("status") != null) {
 						error(j.getString("message"));
-					}else{
+					} else {
 						error("Connection error");
 					}
 				} catch (JSONException e) {
 					error(e.getMessage());
-				}catch (Exception e){
+				} catch (Exception e) {
 					error(e.getMessage());
 				}
-
-				
 			}
-			
-		}.execute();
+
+		}
+				.execute();
 	}
 	abstract protected void success();
 	abstract protected void error(String message);
 	abstract protected void canceled();
-
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/utils/Crypt.java b/platform/android/java/src/org/godotengine/godot/utils/Crypt.java
index 2fd66553f6..35e4e430a4 100644
--- a/platform/android/java/src/org/godotengine/godot/utils/Crypt.java
+++ b/platform/android/java/src/org/godotengine/godot/utils/Crypt.java
@@ -34,34 +34,34 @@ import java.util.Random;
 
 public class Crypt {
 
-	public static String md5(String input){
-        try {
-            // Create MD5 Hash
-            MessageDigest digest = java.security.MessageDigest.getInstance("MD5");
-            digest.update(input.getBytes());
-            byte messageDigest[] = digest.digest();
-            
-            // Create Hex String
-            StringBuffer hexString = new StringBuffer();
-            for (int i=0; i<messageDigest.length; i++)
-                hexString.append(Integer.toHexString(0xFF & messageDigest[i]));
-            return hexString.toString();
-            
-        } catch (Exception e) {
-            e.printStackTrace();
-        }   
-        return "";
+	public static String md5(String input) {
+		try {
+			// Create MD5 Hash
+			MessageDigest digest = java.security.MessageDigest.getInstance("MD5");
+			digest.update(input.getBytes());
+			byte messageDigest[] = digest.digest();
+
+			// Create Hex String
+			StringBuffer hexString = new StringBuffer();
+			for (int i = 0; i < messageDigest.length; i++)
+				hexString.append(Integer.toHexString(0xFF & messageDigest[i]));
+			return hexString.toString();
+
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		return "";
 	}
-	
-	public static String createRandomHash(){
+
+	public static String createRandomHash() {
 		return md5(Long.toString(createRandomLong()));
 	}
-	
-	public static long createAbsRandomLong(){
+
+	public static long createAbsRandomLong() {
 		return Math.abs(createRandomLong());
 	}
-	
-	public static long createRandomLong(){
+
+	public static long createRandomLong() {
 		Random r = new Random();
 		return r.nextLong();
 	}
diff --git a/platform/android/java/src/org/godotengine/godot/utils/CustomSSLSocketFactory.java b/platform/android/java/src/org/godotengine/godot/utils/CustomSSLSocketFactory.java
index 3fc8c48397..bfcf7e3b2a 100644
--- a/platform/android/java/src/org/godotengine/godot/utils/CustomSSLSocketFactory.java
+++ b/platform/android/java/src/org/godotengine/godot/utils/CustomSSLSocketFactory.java
@@ -42,30 +42,29 @@ import javax.net.ssl.TrustManagerFactory;
 
 import org.apache.http.conn.ssl.SSLSocketFactory;
 
-
 /**
  * 
  * @author Luis Linietsky <luis.linietsky@gmail.com>
  */
 public class CustomSSLSocketFactory extends SSLSocketFactory {
-    SSLContext sslContext = SSLContext.getInstance("TLS");
+	SSLContext sslContext = SSLContext.getInstance("TLS");
 
-    public CustomSSLSocketFactory(KeyStore truststore) throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException, UnrecoverableKeyException {
-        super(truststore);
+	public CustomSSLSocketFactory(KeyStore truststore) throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException, UnrecoverableKeyException {
+		super(truststore);
 
-        TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509");
-        tmf.init(truststore);
+		TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509");
+		tmf.init(truststore);
 
-        sslContext.init(null, tmf.getTrustManagers(), null);
-    }
+		sslContext.init(null, tmf.getTrustManagers(), null);
+	}
 
-    @Override
-    public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException, UnknownHostException {
-        return sslContext.getSocketFactory().createSocket(socket, host, port, autoClose);
-    }
+	@Override
+	public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException, UnknownHostException {
+		return sslContext.getSocketFactory().createSocket(socket, host, port, autoClose);
+	}
 
-    @Override
-    public Socket createSocket() throws IOException {
-        return sslContext.getSocketFactory().createSocket();
-    }
+	@Override
+	public Socket createSocket() throws IOException {
+		return sslContext.getSocketFactory().createSocket();
+	}
 }
diff --git a/platform/android/java/src/org/godotengine/godot/utils/HttpRequester.java b/platform/android/java/src/org/godotengine/godot/utils/HttpRequester.java
index 0711f30b8b..81a642af9f 100644
--- a/platform/android/java/src/org/godotengine/godot/utils/HttpRequester.java
+++ b/platform/android/java/src/org/godotengine/godot/utils/HttpRequester.java
@@ -63,7 +63,6 @@ import org.apache.http.params.HttpProtocolParams;
 import org.apache.http.protocol.HTTP;
 import org.apache.http.util.EntityUtils;
 
-
 import android.content.Context;
 import android.content.SharedPreferences;
 import android.util.Log;
@@ -73,155 +72,154 @@ import android.util.Log;
  * @author Luis Linietsky <luis.linietsky@gmail.com>
  */
 public class HttpRequester {
-	
+
 	private Context context;
-	private static final int TTL = 600000; // 10 minutos 
-	private long cttl=0;
-	
-	public HttpRequester(){
+	private static final int TTL = 600000; // 10 minutos
+	private long cttl = 0;
+
+	public HttpRequester() {
 		//Log.d("XXX", "Creando http request sin contexto");
 	}
-	
-	public HttpRequester(Context context){
-		this.context=context;
+
+	public HttpRequester(Context context) {
+		this.context = context;
 		//Log.d("XXX", "Creando http request con contexto");
 	}
-	
-	public String post(RequestParams params){
-	    HttpPost httppost = new HttpPost(params.getUrl());
-        try {
+
+	public String post(RequestParams params) {
+		HttpPost httppost = new HttpPost(params.getUrl());
+		try {
 			httppost.setEntity(new UrlEncodedFormEntity(params.toPairsList()));
 			return request(httppost);
 		} catch (UnsupportedEncodingException e) {
 			return null;
 		}
 	}
-	
-	public String get(RequestParams params){
+
+	public String get(RequestParams params) {
 		String response = getResponseFromCache(params.getUrl());
-		if(response == null){
+		if (response == null) {
 			//Log.d("XXX", "Cache miss!");
-		    HttpGet httpget = new HttpGet(params.getUrl());
-		    long timeInit = new Date().getTime();
-		    response = request(httpget);
-		    long delay = new Date().getTime() - timeInit;
-		    Log.d("com.app11tt.android.utils.HttpRequest::get(url)", "Url: " + params.getUrl() + " downloaded in " + String.format("%.03f", delay/1000.0f) + " seconds");
-		    if(response == null || response.length() == 0){
-		    	response = "";
-		    }else{
-		    	saveResponseIntoCache(params.getUrl(), response);
-		    } 
+			HttpGet httpget = new HttpGet(params.getUrl());
+			long timeInit = new Date().getTime();
+			response = request(httpget);
+			long delay = new Date().getTime() - timeInit;
+			Log.d("com.app11tt.android.utils.HttpRequest::get(url)", "Url: " + params.getUrl() + " downloaded in " + String.format("%.03f", delay / 1000.0f) + " seconds");
+			if (response == null || response.length() == 0) {
+				response = "";
+			} else {
+				saveResponseIntoCache(params.getUrl(), response);
+			}
 		}
 		Log.d("XXX", "Req: " + params.getUrl());
 		Log.d("XXX", "Resp: " + response);
-	    return response;
+		return response;
 	}
-	
-	private String request(HttpUriRequest request){
+
+	private String request(HttpUriRequest request) {
 		//Log.d("XXX", "Haciendo request a: " + request.getURI() );
-		Log.d("PPP", "Haciendo request a: " + request.getURI() );
+		Log.d("PPP", "Haciendo request a: " + request.getURI());
 		long init = new Date().getTime();
 		HttpClient httpclient = getNewHttpClient();
 		HttpParams httpParameters = httpclient.getParams();
 		HttpConnectionParams.setConnectionTimeout(httpParameters, 0);
 		HttpConnectionParams.setSoTimeout(httpParameters, 0);
 		HttpConnectionParams.setTcpNoDelay(httpParameters, true);
-	    try {
-	        HttpResponse response = httpclient.execute(request);
-	        Log.d("PPP", "Fin de request (" + (new Date().getTime() - init) + ") a: " + request.getURI() );
+		try {
+			HttpResponse response = httpclient.execute(request);
+			Log.d("PPP", "Fin de request (" + (new Date().getTime() - init) + ") a: " + request.getURI());
 			//Log.d("XXX1", "Status:" + response.getStatusLine().toString());
-	        if(response.getStatusLine().getStatusCode() == 200){
-	        	String strResponse = EntityUtils.toString(response.getEntity());
+			if (response.getStatusLine().getStatusCode() == 200) {
+				String strResponse = EntityUtils.toString(response.getEntity());
 				//Log.d("XXX2", strResponse);
-	        	return strResponse;
-	        }else{
-	        	Log.d("XXX3", "Response status code:" + response.getStatusLine().getStatusCode() + "\n" + EntityUtils.toString(response.getEntity()));
-	        	return null;
-	        }
-	        
-	    } catch (ClientProtocolException e) {
-	    	Log.d("XXX3", e.getMessage());
-	    } catch (IOException e) {
-	    	Log.d("XXX4", e.getMessage());
-	    }
+				return strResponse;
+			} else {
+				Log.d("XXX3", "Response status code:" + response.getStatusLine().getStatusCode() + "\n" + EntityUtils.toString(response.getEntity()));
+				return null;
+			}
+
+		} catch (ClientProtocolException e) {
+			Log.d("XXX3", e.getMessage());
+		} catch (IOException e) {
+			Log.d("XXX4", e.getMessage());
+		}
 		return null;
 	}
-	
+
 	private HttpClient getNewHttpClient() {
-	    try {
-	        KeyStore trustStore = KeyStore.getInstance(KeyStore.getDefaultType());
-	        trustStore.load(null, null);
+		try {
+			KeyStore trustStore = KeyStore.getInstance(KeyStore.getDefaultType());
+			trustStore.load(null, null);
 
-	        SSLSocketFactory sf = new CustomSSLSocketFactory(trustStore);
-	        sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
+			SSLSocketFactory sf = new CustomSSLSocketFactory(trustStore);
+			sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
 
-	        HttpParams params = new BasicHttpParams();
-	        HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
-	        HttpProtocolParams.setContentCharset(params, HTTP.UTF_8);
+			HttpParams params = new BasicHttpParams();
+			HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
+			HttpProtocolParams.setContentCharset(params, HTTP.UTF_8);
 
-	        SchemeRegistry registry = new SchemeRegistry();
-	        registry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
-	        registry.register(new Scheme("https", sf, 443));
+			SchemeRegistry registry = new SchemeRegistry();
+			registry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
+			registry.register(new Scheme("https", sf, 443));
 
-	        ClientConnectionManager ccm = new ThreadSafeClientConnManager(params, registry);
+			ClientConnectionManager ccm = new ThreadSafeClientConnManager(params, registry);
 
-	        return new DefaultHttpClient(ccm, params);
-	    } catch (Exception e) {
-	        return new DefaultHttpClient();
-	    }
+			return new DefaultHttpClient(ccm, params);
+		} catch (Exception e) {
+			return new DefaultHttpClient();
+		}
 	}
-	
+
 	private static String convertStreamToString(InputStream is) {
-	    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
-	    StringBuilder sb = new StringBuilder();
-	    String line = null;
-	    try {
-	        while ((line = reader.readLine()) != null) {
-	            sb.append((line + "\n"));
-	        }
-	    } catch (IOException e) {
-	        e.printStackTrace();
-	    } finally {
-	        try {
-	            is.close(); 
-	        } catch (IOException e) {
-	            e.printStackTrace(); 
-	        }
-	    }
-	    return sb.toString();
+		BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+		StringBuilder sb = new StringBuilder();
+		String line = null;
+		try {
+			while ((line = reader.readLine()) != null) {
+				sb.append((line + "\n"));
+			}
+		} catch (IOException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				is.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		return sb.toString();
 	}
 
-	public void saveResponseIntoCache(String request, String response){
-		if(context == null){
+	public void saveResponseIntoCache(String request, String response) {
+		if (context == null) {
 			//Log.d("XXX", "No context, cache failed!");
 			return;
 		}
-        SharedPreferences sharedPref = context.getSharedPreferences("http_get_cache", Context.MODE_PRIVATE); 
-        SharedPreferences.Editor editor = sharedPref.edit();
-        editor.putString("request_" + Crypt.md5(request), response);
-        editor.putLong("request_" + Crypt.md5(request) + "_ttl", new Date().getTime() + getTtl());
-        editor.commit();
+		SharedPreferences sharedPref = context.getSharedPreferences("http_get_cache", Context.MODE_PRIVATE);
+		SharedPreferences.Editor editor = sharedPref.edit();
+		editor.putString("request_" + Crypt.md5(request), response);
+		editor.putLong("request_" + Crypt.md5(request) + "_ttl", new Date().getTime() + getTtl());
+		editor.commit();
 	}
-	
-	
-	public String getResponseFromCache(String request){
-		if(context == null){
+
+	public String getResponseFromCache(String request) {
+		if (context == null) {
 			Log.d("XXX", "No context, cache miss");
 			return null;
 		}
-        SharedPreferences sharedPref = context.getSharedPreferences( "http_get_cache", Context.MODE_PRIVATE);
-        long ttl = getResponseTtl(request);
-        if(ttl == 0l || (new Date().getTime() - ttl) > 0l){
-        	Log.d("XXX", "Cache invalid ttl:" + ttl + " vs now:" + new Date().getTime());
-        	return null;
-        }
-        return sharedPref.getString("request_" + Crypt.md5(request), null);
+		SharedPreferences sharedPref = context.getSharedPreferences("http_get_cache", Context.MODE_PRIVATE);
+		long ttl = getResponseTtl(request);
+		if (ttl == 0l || (new Date().getTime() - ttl) > 0l) {
+			Log.d("XXX", "Cache invalid ttl:" + ttl + " vs now:" + new Date().getTime());
+			return null;
+		}
+		return sharedPref.getString("request_" + Crypt.md5(request), null);
 	}
 
-	public long getResponseTtl(String request){
-        SharedPreferences sharedPref = context.getSharedPreferences(
-        		"http_get_cache", Context.MODE_PRIVATE); 
-        return sharedPref.getLong("request_" + Crypt.md5(request) + "_ttl", 0l);
+	public long getResponseTtl(String request) {
+		SharedPreferences sharedPref = context.getSharedPreferences(
+				"http_get_cache", Context.MODE_PRIVATE);
+		return sharedPref.getLong("request_" + Crypt.md5(request) + "_ttl", 0l);
 	}
 
 	public long getTtl() {
@@ -229,7 +227,6 @@ public class HttpRequester {
 	}
 
 	public void setTtl(long ttl) {
-		this.cttl = (ttl*1000) + new Date().getTime();
+		this.cttl = (ttl * 1000) + new Date().getTime();
 	}
-	
 }
diff --git a/platform/android/java/src/org/godotengine/godot/utils/RequestParams.java b/platform/android/java/src/org/godotengine/godot/utils/RequestParams.java
index ded11550bb..051fed5500 100644
--- a/platform/android/java/src/org/godotengine/godot/utils/RequestParams.java
+++ b/platform/android/java/src/org/godotengine/godot/utils/RequestParams.java
@@ -43,33 +43,33 @@ import org.apache.http.message.BasicNameValuePair;
  */
 public class RequestParams {
 
-	private HashMap<String,String> params;
+	private HashMap<String, String> params;
 	private String url;
-	
-	public RequestParams(){
-		params = new HashMap<String,String>();
+
+	public RequestParams() {
+		params = new HashMap<String, String>();
 	}
-	
-	public void put(String key, String value){
+
+	public void put(String key, String value) {
 		params.put(key, value);
 	}
-	
-	public String get(String key){
+
+	public String get(String key) {
 		return params.get(key);
 	}
-	
-	public void remove(Object key){
+
+	public void remove(Object key) {
 		params.remove(key);
 	}
-	
-	public boolean has(String key){
+
+	public boolean has(String key) {
 		return params.containsKey(key);
 	}
-	
-	public List<NameValuePair> toPairsList(){
-		List<NameValuePair>  fields = new ArrayList<NameValuePair>();
 
-		for(String key : params.keySet()){
+	public List<NameValuePair> toPairsList() {
+		List<NameValuePair> fields = new ArrayList<NameValuePair>();
+
+		for (String key : params.keySet()) {
 			fields.add(new BasicNameValuePair(key, this.get(key)));
 		}
 		return fields;
@@ -82,6 +82,4 @@ public class RequestParams {
 	public void setUrl(String url) {
 		this.url = url;
 	}
-
-	
 }
diff --git a/platform/iphone/app_delegate.mm b/platform/iphone/app_delegate.mm
index b591f80aa7..111cdce2de 100644
--- a/platform/iphone/app_delegate.mm
+++ b/platform/iphone/app_delegate.mm
@@ -35,21 +35,6 @@
 #include "main/main.h"
 #include "os_iphone.h"
 
-#ifdef MODULE_FACEBOOKSCORER_IOS_ENABLED
-#include "modules/FacebookScorer_ios/FacebookScorer.h"
-#endif
-
-#ifdef MODULE_GAME_ANALYTICS_ENABLED
-#import "modules/game_analytics/ios/MobileAppTracker.framework/Headers/MobileAppTracker.h"
-//#import "modules/game_analytics/ios/MobileAppTracker.h"
-#import <AdSupport/AdSupport.h>
-#endif
-
-#ifdef MODULE_PARSE_ENABLED
-#import "FBSDKCoreKit/FBSDKCoreKit.h"
-#import <Parse/Parse.h>
-#endif
-
 #import "GameController/GameController.h"
 
 #define kFilteringFactor 0.1
@@ -418,11 +403,7 @@ static int frame_count = 0;
 			OSIPhone::get_singleton()->set_unique_id(String::utf8([uuid UTF8String]));
 
 		}; break;
-		/*
-	case 1: {
-																	++frame_count;
-	}; break;
-*/
+
 		case 1: {
 
 			Main::setup2();
@@ -453,11 +434,7 @@ static int frame_count = 0;
 			}
 
 		}; break;
-		/*
-	case 3: {
-																	++frame_count;
-	}; break;
-*/
+
 		case 2: {
 
 			Main::start();
@@ -558,15 +535,11 @@ static int frame_count = 0;
 };
 
 - (void)applicationDidReceiveMemoryWarning:(UIApplication *)application {
-
-	printf("****************** did receive memory warning!\n");
 	OS::get_singleton()->get_main_loop()->notification(
 			MainLoop::NOTIFICATION_OS_MEMORY_WARNING);
 };
 
 - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
-
-	printf("**************** app delegate init\n");
 	CGRect rect = [[UIScreen mainScreen] bounds];
 
 	[application setStatusBarHidden:YES withAnimation:UIStatusBarAnimationNone];
@@ -603,7 +576,7 @@ static int frame_count = 0;
 	if (err != 0) {
 		// bail, things did not go very well for us, should probably output a message on screen with our error code...
 		exit(0);
-		return;
+		return FALSE;
 	};
 
 	view_controller = [[ViewController alloc] init];
@@ -641,43 +614,10 @@ static int frame_count = 0;
 	// prevent to stop music in another background app
 	[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryAmbient error:nil];
 
-#ifdef MODULE_GAME_ANALYTICS_ENABLED
-	printf("********************* didFinishLaunchingWithOptions\n");
-	if (!ProjectSettings::get_singleton()->has("mobileapptracker/advertiser_id")) {
-		return;
-	}
-	if (!ProjectSettings::get_singleton()->has("mobileapptracker/conversion_key")) {
-		return;
-	}
-
-	String adid = GLOBAL_DEF("mobileapptracker/advertiser_id", "");
-	String convkey = GLOBAL_DEF("mobileapptracker/conversion_key", "");
-
-	NSString *advertiser_id =
-			[NSString stringWithUTF8String:adid.utf8().get_data()];
-	NSString *conversion_key =
-			[NSString stringWithUTF8String:convkey.utf8().get_data()];
-
-	// Account Configuration info - must be set
-	[MobileAppTracker initializeWithMATAdvertiserId:advertiser_id
-								   MATConversionKey:conversion_key];
-
-	// Used to pass us the IFA, enables highly accurate 1-to-1 attribution.
-	// Required for many advertising networks.
-	[MobileAppTracker
-			setAppleAdvertisingIdentifier:[[ASIdentifierManager sharedManager]
-												  advertisingIdentifier]
-			   advertisingTrackingEnabled:[[ASIdentifierManager sharedManager]
-												  isAdvertisingTrackingEnabled]];
-
-#endif
 	return TRUE;
 };
 
 - (void)applicationWillTerminate:(UIApplication *)application {
-
-	printf("********************* will terminate\n");
-
 	[self deinitGameControllers];
 
 	if (motionInitialised) {
@@ -692,7 +632,6 @@ static int frame_count = 0;
 };
 
 - (void)applicationDidEnterBackground:(UIApplication *)application {
-	printf("********************* did enter background\n");
 	///@TODO maybe add pause motionManager? and where would we unpause it?
 
 	if (OS::get_singleton()->get_main_loop())
@@ -706,24 +645,17 @@ static int frame_count = 0;
 }
 
 - (void)applicationWillEnterForeground:(UIApplication *)application {
-	printf("********************* did enter foreground\n");
 	// OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_WM_FOCUS_IN);
 	[view_controller.view startAnimation];
 }
 
 - (void)applicationWillResignActive:(UIApplication *)application {
-	printf("********************* will resign active\n");
 	// OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_WM_FOCUS_OUT);
 	[view_controller.view
 					stopAnimation]; // FIXME: pause seems to be recommended elsewhere
 }
 
 - (void)applicationDidBecomeActive:(UIApplication *)application {
-	printf("********************* did become active\n");
-#ifdef MODULE_GAME_ANALYTICS_ENABLED
-	printf("********************* mobile app tracker found\n");
-	[MobileAppTracker measureSession];
-#endif
 	if (OS::get_singleton()->get_main_loop())
 		OS::get_singleton()->get_main_loop()->notification(
 				MainLoop::NOTIFICATION_WM_FOCUS_IN);
@@ -739,65 +671,6 @@ static int frame_count = 0;
 		AudioDriverCoreAudio::get_singleton()->start();
 }
 
-- (BOOL)application:(UIApplication *)application handleOpenURL:(NSURL *)url {
-#ifdef MODULE_FACEBOOKSCORER_IOS_ENABLED
-	return [[[FacebookScorer sharedInstance] facebook] handleOpenURL:url];
-#else
-	return false;
-#endif
-}
-
-// For 4.2+ support
-- (BOOL)application:(UIApplication *)application
-				  openURL:(NSURL *)url
-		sourceApplication:(NSString *)sourceApplication
-			   annotation:(id)annotation {
-#ifdef MODULE_PARSE_ENABLED
-	NSLog(@"Handling application openURL");
-	return
-			[[FBSDKApplicationDelegate sharedInstance] application:application
-														   openURL:url
-												 sourceApplication:sourceApplication
-														annotation:annotation];
-#endif
-
-#ifdef MODULE_FACEBOOKSCORER_IOS_ENABLED
-	return [[[FacebookScorer sharedInstance] facebook] handleOpenURL:url];
-#else
-	return false;
-#endif
-}
-
-- (void)application:(UIApplication *)application
-		didRegisterForRemoteNotificationsWithDeviceToken:(NSData *)deviceToken {
-#ifdef MODULE_PARSE_ENABLED
-	// Store the deviceToken in the current installation and save it to Parse.
-	PFInstallation *currentInstallation = [PFInstallation currentInstallation];
-	// NSString* token = [[NSString alloc] initWithData:deviceToken
-	// encoding:NSUTF8StringEncoding];
-	NSLog(@"Device Token : %@ ", deviceToken);
-	[currentInstallation setDeviceTokenFromData:deviceToken];
-	[currentInstallation saveInBackground];
-#endif
-}
-
-- (void)application:(UIApplication *)application
-		didReceiveRemoteNotification:(NSDictionary *)userInfo {
-#ifdef MODULE_PARSE_ENABLED
-	[PFPush handlePush:userInfo];
-	NSDictionary *aps =
-			[userInfo objectForKey:UIApplicationLaunchOptionsRemoteNotificationKey];
-	NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults];
-
-	NSLog(@"Push Notification Payload (app active) %@", aps);
-	[defaults setObject:aps forKey:@"notificationInfo"];
-	[defaults synchronize];
-	if (application.applicationState == UIApplicationStateInactive) {
-		[PFAnalytics trackAppOpenedWithRemoteNotificationPayload:userInfo];
-	}
-#endif
-}
-
 - (void)dealloc {
 	[window release];
 	[super dealloc];
diff --git a/platform/iphone/in_app_store.mm b/platform/iphone/in_app_store.mm
index 25f4e1e166..31d3659802 100644
--- a/platform/iphone/in_app_store.mm
+++ b/platform/iphone/in_app_store.mm
@@ -31,10 +31,6 @@
 
 #include "in_app_store.h"
 
-#ifdef MODULE_FUSEBOXX_ENABLED
-#import "modules/fuseboxx/ios/FuseSDK.h"
-#endif
-
 extern "C" {
 #import <Foundation/Foundation.h>
 #import <StoreKit/StoreKit.h>
@@ -224,10 +220,6 @@ Error InAppStore::request_product_info(Variant p_params) {
 					[pending_transactions setObject:transaction forKey:transaction.payment.productIdentifier];
 				}
 
-#ifdef MODULE_FUSEBOXX_ENABLED
-				printf("Registering transaction on Fuseboxx!\n");
-				[FuseSDK registerInAppPurchase:transaction];
-#endif
 			}; break;
 			case SKPaymentTransactionStateFailed: {
 				printf("status transaction failed!\n");
diff --git a/platform/windows/SCsub b/platform/windows/SCsub
index 5a253d5db5..135ccd902a 100644
--- a/platform/windows/SCsub
+++ b/platform/windows/SCsub
@@ -4,9 +4,14 @@ import os
 Import('env')
 
 def make_debug_mingw(target, source, env):
-    os.system('objcopy --only-keep-debug %s %s.debug' % (target[0], target[0]))
-    os.system('strip --strip-debug --strip-unneeded %s' % (target[0]))
-    os.system('objcopy --add-gnu-debuglink=%s.debug %s' % (target[0], target[0]))
+    mingw_prefix = ""
+    if (env["bits"] == "32"):
+        mingw_prefix = env["mingw_prefix_32"]
+    else:
+        mingw_prefix = env["mingw_prefix_64"]
+    os.system(mingw_prefix + 'objcopy --only-keep-debug %s %s.debug' % (target[0], target[0]))
+    os.system(mingw_prefix + 'strip --strip-debug --strip-unneeded %s' % (target[0]))
+    os.system(mingw_prefix + 'objcopy --add-gnu-debuglink=%s.debug %s' % (target[0], target[0]))
 
 common_win = [
     "context_gl_win.cpp",
diff --git a/platform/x11/os_x11.cpp b/platform/x11/os_x11.cpp
index 263ff012d4..b59fab7088 100644
--- a/platform/x11/os_x11.cpp
+++ b/platform/x11/os_x11.cpp
@@ -195,14 +195,13 @@ void OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_au
 				XIDeviceInfo *dev = &info[i];
 				if (!dev->enabled)
 					continue;
-				/*if (dev->use != XIMasterPointer)
-					continue;*/
+				if (!(dev->use == XIMasterPointer || dev->use == XIFloatingSlave))
+					continue;
 
 				bool direct_touch = false;
 				for (int j = 0; j < dev->num_classes; j++) {
 					if (dev->classes[j]->type == XITouchClass && ((XITouchClassInfo *)dev->classes[j])->mode == XIDirectTouch) {
 						direct_touch = true;
-						printf("%d) %d %s\n", i, dev->attachment, dev->name);
 						break;
 					}
 				}
@@ -215,7 +214,7 @@ void OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_au
 			XIFreeDeviceInfo(info);
 
 			if (!touch.devices.size()) {
-				fprintf(stderr, "No suitable touch device found\n");
+				fprintf(stderr, "No touch devices found\n");
 			}
 		}
 	}
@@ -359,7 +358,7 @@ void OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_au
 		// Must be alive after this block
 		static unsigned char mask_data[XIMaskLen(XI_LASTEVENT)] = {};
 
-		touch.event_mask.deviceid = XIAllMasterDevices;
+		touch.event_mask.deviceid = XIAllDevices;
 		touch.event_mask.mask_len = sizeof(mask_data);
 		touch.event_mask.mask = mask_data;
 
@@ -370,12 +369,14 @@ void OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_au
 
 		XISelectEvents(x11_display, x11_window, &touch.event_mask, 1);
 
-		XIClearMask(touch.event_mask.mask, XI_TouchOwnership);
+		// Disabled by now since grabbing also blocks mouse events
+		// (they are received as extended events instead of standard events)
+		/*XIClearMask(touch.event_mask.mask, XI_TouchOwnership);
 
 		// Grab touch devices to avoid OS gesture interference
 		for (int i = 0; i < touch.devices.size(); ++i) {
 			XIGrabDevice(x11_display, touch.devices[i], x11_window, CurrentTime, None, XIGrabModeAsync, XIGrabModeAsync, False, &touch.event_mask);
-		}
+		}*/
 	}
 #endif
 
@@ -512,15 +513,14 @@ void OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_au
 	power_manager = memnew(PowerX11);
 
 	XEvent xevent;
-	while (XCheckIfEvent(x11_display, &xevent, _check_window_events, NULL)) {
-		_window_changed(&xevent);
+	while (XPending(x11_display) > 0) {
+		XNextEvent(x11_display, &xevent);
+		if (xevent.type == ConfigureNotify) {
+			_window_changed(&xevent);
+		}
 	}
 }
 
-int OS_X11::_check_window_events(Display *display, XEvent *event, char *arg) {
-	if (event->type == ConfigureNotify) return 1;
-	return 0;
-}
 void OS_X11::xim_destroy_callback(::XIM im, ::XPointer client_data,
 		::XPointer call_data) {
 
@@ -1513,7 +1513,7 @@ void OS_X11::process_xevents() {
 #ifdef TOUCH_ENABLED
 		if (XGetEventData(x11_display, &event.xcookie)) {
 
-			if (event.xcookie.extension == touch.opcode) {
+			if (event.xcookie.type == GenericEvent && event.xcookie.extension == touch.opcode) {
 
 				XIDeviceEvent *event_data = (XIDeviceEvent *)event.xcookie.data;
 				int index = event_data->detail;
@@ -1522,7 +1522,8 @@ void OS_X11::process_xevents() {
 				switch (event_data->evtype) {
 
 					case XI_TouchBegin: // Fall-through
-						XIAllowTouchEvents(x11_display, event_data->deviceid, event_data->detail, x11_window, XIAcceptTouch);
+							// Disabled hand-in-hand with the grabbing
+							//XIAllowTouchEvents(x11_display, event_data->deviceid, event_data->detail, x11_window, XIAcceptTouch);
 
 					case XI_TouchEnd: {
 
@@ -1568,9 +1569,8 @@ void OS_X11::process_xevents() {
 					} break;
 				}
 			}
-
-			XFreeEventData(x11_display, &event.xcookie);
 		}
+		XFreeEventData(x11_display, &event.xcookie);
 #endif
 
 		switch (event.type) {
@@ -1616,10 +1616,10 @@ void OS_X11::process_xevents() {
 							GrabModeAsync, GrabModeAsync, x11_window, None, CurrentTime);
 				}
 #ifdef TOUCH_ENABLED
-				// Grab touch devices to avoid OS gesture interference
-				for (int i = 0; i < touch.devices.size(); ++i) {
+					// Grab touch devices to avoid OS gesture interference
+					/*for (int i = 0; i < touch.devices.size(); ++i) {
 					XIGrabDevice(x11_display, touch.devices[i], x11_window, CurrentTime, None, XIGrabModeAsync, XIGrabModeAsync, False, &touch.event_mask);
-				}
+				}*/
 #endif
 				if (xic) {
 					XSetICFocus(xic);
@@ -1639,9 +1639,9 @@ void OS_X11::process_xevents() {
 				}
 #ifdef TOUCH_ENABLED
 				// Ungrab touch devices so input works as usual while we are unfocused
-				for (int i = 0; i < touch.devices.size(); ++i) {
+				/*for (int i = 0; i < touch.devices.size(); ++i) {
 					XIUngrabDevice(x11_display, touch.devices[i], CurrentTime);
-				}
+				}*/
 
 				// Release every pointer to avoid sticky points
 				for (Map<int, Vector2>::Element *E = touch.state.front(); E; E = E->next()) {
diff --git a/platform/x11/os_x11.h b/platform/x11/os_x11.h
index 84dff2e089..244c69ee2b 100644
--- a/platform/x11/os_x11.h
+++ b/platform/x11/os_x11.h
@@ -199,7 +199,6 @@ protected:
 	virtual void set_main_loop(MainLoop *p_main_loop);
 
 	void _window_changed(XEvent *xevent);
-	static int _check_window_events(Display *display, XEvent *xevent, char *arg);
 
 public:
 	virtual String get_name();
diff --git a/scene/2d/tile_map.cpp b/scene/2d/tile_map.cpp
index 5a32a3d0f0..1e34372d1e 100644
--- a/scene/2d/tile_map.cpp
+++ b/scene/2d/tile_map.cpp
@@ -28,6 +28,7 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 #include "tile_map.h"
+
 #include "io/marshalls.h"
 #include "method_bind_ext.gen.inc"
 #include "os/os.h"
@@ -169,10 +170,12 @@ void TileMap::set_cell_size(Size2 p_size) {
 	_recreate_quadrants();
 	emit_signal("settings_changed");
 }
+
 Size2 TileMap::get_cell_size() const {
 
 	return cell_size;
 }
+
 void TileMap::set_quadrant_size(int p_size) {
 
 	ERR_FAIL_COND(p_size < 1);
@@ -182,32 +185,12 @@ void TileMap::set_quadrant_size(int p_size) {
 	_recreate_quadrants();
 	emit_signal("settings_changed");
 }
+
 int TileMap::get_quadrant_size() const {
 
 	return quadrant_size;
 }
 
-void TileMap::set_center_x(bool p_enable) {
-
-	center_x = p_enable;
-	_recreate_quadrants();
-	emit_signal("settings_changed");
-}
-bool TileMap::get_center_x() const {
-
-	return center_x;
-}
-void TileMap::set_center_y(bool p_enable) {
-
-	center_y = p_enable;
-	_recreate_quadrants();
-	emit_signal("settings_changed");
-}
-bool TileMap::get_center_y() const {
-
-	return center_y;
-}
-
 void TileMap::_fix_cell_transform(Transform2D &xform, const Cell &p_cell, const Vector2 &p_offset, const Size2 &p_sc) {
 
 	Size2 s = p_sc;
@@ -1473,12 +1456,6 @@ void TileMap::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_tile_origin", "origin"), &TileMap::set_tile_origin);
 	ClassDB::bind_method(D_METHOD("get_tile_origin"), &TileMap::get_tile_origin);
 
-	ClassDB::bind_method(D_METHOD("set_center_x", "enable"), &TileMap::set_center_x);
-	ClassDB::bind_method(D_METHOD("get_center_x"), &TileMap::get_center_x);
-
-	ClassDB::bind_method(D_METHOD("set_center_y", "enable"), &TileMap::set_center_y);
-	ClassDB::bind_method(D_METHOD("get_center_y"), &TileMap::get_center_y);
-
 	ClassDB::bind_method(D_METHOD("set_clip_uv", "enable"), &TileMap::set_clip_uv);
 	ClassDB::bind_method(D_METHOD("get_clip_uv"), &TileMap::get_clip_uv);
 
@@ -1580,8 +1557,6 @@ TileMap::TileMap() {
 	quadrant_order_dirty = false;
 	quadrant_size = 16;
 	cell_size = Size2(64, 64);
-	center_x = false;
-	center_y = false;
 	collision_layer = 1;
 	collision_mask = 1;
 	friction = 1;
diff --git a/scene/2d/tile_map.h b/scene/2d/tile_map.h
index 11d9915cb6..e5608884c4 100644
--- a/scene/2d/tile_map.h
+++ b/scene/2d/tile_map.h
@@ -68,7 +68,6 @@ private:
 	Ref<TileSet> tile_set;
 	Size2i cell_size;
 	int quadrant_size;
-	bool center_x, center_y;
 	Mode mode;
 	Transform2D custom_transform;
 	HalfOffset half_offset;
@@ -231,11 +230,6 @@ public:
 	void set_quadrant_size(int p_size);
 	int get_quadrant_size() const;
 
-	void set_center_x(bool p_enable);
-	bool get_center_x() const;
-	void set_center_y(bool p_enable);
-	bool get_center_y() const;
-
 	void set_cell(int p_x, int p_y, int p_tile, bool p_flip_x = false, bool p_flip_y = false, bool p_transpose = false, Vector2 p_autotile_coord = Vector2());
 	int get_cell(int p_x, int p_y) const;
 	bool is_cell_x_flipped(int p_x, int p_y) const;
diff --git a/scene/3d/gi_probe.cpp b/scene/3d/gi_probe.cpp
index ff5cb41135..1f2b43165e 100644
--- a/scene/3d/gi_probe.cpp
+++ b/scene/3d/gi_probe.cpp
@@ -1286,7 +1286,7 @@ void GIProbe::bake(Node *p_from_node, bool p_create_visual_debug) {
 
 		Ref<GIProbeData> probe_data = get_probe_data();
 
-		if(probe_data.is_null())
+		if (probe_data.is_null())
 			probe_data.instance();
 
 		probe_data->set_bounds(AABB(-extents, extents * 2.0));
diff --git a/scene/3d/particles.cpp b/scene/3d/particles.cpp
index c137b7e8ff..821f1a5a78 100644
--- a/scene/3d/particles.cpp
+++ b/scene/3d/particles.cpp
@@ -1189,7 +1189,7 @@ void ParticlesMaterial::set_flag(Flags p_flag, bool p_enable) {
 	ERR_FAIL_INDEX(p_flag, FLAG_MAX);
 	flags[p_flag] = p_enable;
 	_queue_shader_change();
-	if (p_flag==FLAG_DISABLE_Z) {
+	if (p_flag == FLAG_DISABLE_Z) {
 		_change_notify();
 	}
 }
@@ -1379,7 +1379,7 @@ void ParticlesMaterial::_validate_property(PropertyInfo &property) const {
 	}
 
 	if (property.name.begins_with("orbit_") && !flags[FLAG_DISABLE_Z]) {
-		property.usage=0;
+		property.usage = 0;
 	}
 }
 
diff --git a/scene/3d/physics_body.cpp b/scene/3d/physics_body.cpp
index 8c9f59e267..c5f817d317 100644
--- a/scene/3d/physics_body.cpp
+++ b/scene/3d/physics_body.cpp
@@ -734,31 +734,12 @@ bool RigidBody::is_contact_monitor_enabled() const {
 	return contact_monitor != NULL;
 }
 
-void RigidBody::set_axis_lock_x(bool p_lock) {
-	RigidBody::locked_axis[0] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 0, locked_axis[0]);
+void RigidBody::set_axis_lock(PhysicsServer::BodyAxis p_axis, bool p_lock) {
+	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), p_axis, p_lock);
 }
 
-void RigidBody::set_axis_lock_y(bool p_lock) {
-	RigidBody::locked_axis[1] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 1, locked_axis[1]);
-}
-
-void RigidBody::set_axis_lock_z(bool p_lock) {
-	RigidBody::locked_axis[2] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 2, locked_axis[2]);
-}
-
-bool RigidBody::get_axis_lock_x() const {
-	return RigidBody::locked_axis[0];
-}
-
-bool RigidBody::get_axis_lock_y() const {
-	return RigidBody::locked_axis[1];
-}
-
-bool RigidBody::get_axis_lock_z() const {
-	return RigidBody::locked_axis[2];
+bool RigidBody::get_axis_lock(PhysicsServer::BodyAxis p_axis) const {
+	return PhysicsServer::get_singleton()->body_is_axis_locked(get_rid(), p_axis);
 }
 
 Array RigidBody::get_colliding_bodies() const {
@@ -853,12 +834,8 @@ void RigidBody::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("_body_enter_tree"), &RigidBody::_body_enter_tree);
 	ClassDB::bind_method(D_METHOD("_body_exit_tree"), &RigidBody::_body_exit_tree);
 
-	ClassDB::bind_method(D_METHOD("set_axis_lock_x", "axis_lock_x"), &RigidBody::set_axis_lock_x);
-	ClassDB::bind_method(D_METHOD("set_axis_lock_y", "axis_lock_y"), &RigidBody::set_axis_lock_y);
-	ClassDB::bind_method(D_METHOD("set_axis_lock_z", "axis_lock_z"), &RigidBody::set_axis_lock_z);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_x"), &RigidBody::get_axis_lock_x);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_y"), &RigidBody::get_axis_lock_y);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_z"), &RigidBody::get_axis_lock_z);
+	ClassDB::bind_method(D_METHOD("set_axis_lock", "axis", "lock"), &RigidBody::set_axis_lock);
+	ClassDB::bind_method(D_METHOD("get_axis_lock", "axis"), &RigidBody::get_axis_lock);
 
 	ClassDB::bind_method(D_METHOD("get_colliding_bodies"), &RigidBody::get_colliding_bodies);
 
@@ -877,9 +854,12 @@ void RigidBody::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "sleeping"), "set_sleeping", "is_sleeping");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "can_sleep"), "set_can_sleep", "is_able_to_sleep");
 	ADD_GROUP("Axis Lock", "axis_lock_");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_x"), "set_axis_lock_x", "get_axis_lock_x");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_y"), "set_axis_lock_y", "get_axis_lock_y");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_z"), "set_axis_lock_z", "get_axis_lock_z");
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_x"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_X);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_y"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_Y);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_z"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_Z);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_x"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_X);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_y"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_Y);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_z"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_Z);
 	ADD_GROUP("Linear", "linear_");
 	ADD_PROPERTY(PropertyInfo(Variant::VECTOR3, "linear_velocity"), "set_linear_velocity", "get_linear_velocity");
 	ADD_PROPERTY(PropertyInfo(Variant::REAL, "linear_damp", PROPERTY_HINT_RANGE, "-1,128,0.01"), "set_linear_damp", "get_linear_damp");
@@ -969,7 +949,7 @@ bool KinematicBody::move_and_collide(const Vector3 &p_motion, Collision &r_colli
 	}
 
 	for (int i = 0; i < 3; i++) {
-		if (locked_axis[i]) {
+		if (locked_axis & (1 << i)) {
 			result.motion[i] = 0;
 		}
 	}
@@ -985,7 +965,7 @@ Vector3 KinematicBody::move_and_slide(const Vector3 &p_linear_velocity, const Ve
 	Vector3 lv = p_linear_velocity;
 
 	for (int i = 0; i < 3; i++) {
-		if (locked_axis[i]) {
+		if (locked_axis & (1 << i)) {
 			lv[i] = 0;
 		}
 	}
@@ -1038,7 +1018,7 @@ Vector3 KinematicBody::move_and_slide(const Vector3 &p_linear_velocity, const Ve
 			lv = lv.slide(n);
 
 			for (int i = 0; i < 3; i++) {
-				if (locked_axis[i]) {
+				if (locked_axis & (1 << i)) {
 					lv[i] = 0;
 				}
 			}
@@ -1082,31 +1062,12 @@ bool KinematicBody::test_move(const Transform &p_from, const Vector3 &p_motion)
 	return PhysicsServer::get_singleton()->body_test_motion(get_rid(), p_from, p_motion);
 }
 
-void KinematicBody::set_axis_lock_x(bool p_lock) {
-	KinematicBody::locked_axis[0] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 0, locked_axis[0]);
+void KinematicBody::set_axis_lock(PhysicsServer::BodyAxis p_axis, bool p_lock) {
+	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), p_axis, p_lock);
 }
 
-void KinematicBody::set_axis_lock_y(bool p_lock) {
-	KinematicBody::locked_axis[1] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 1, locked_axis[1]);
-}
-
-void KinematicBody::set_axis_lock_z(bool p_lock) {
-	KinematicBody::locked_axis[2] = p_lock;
-	PhysicsServer::get_singleton()->body_set_axis_lock(get_rid(), 2, locked_axis[2]);
-}
-
-bool KinematicBody::get_axis_lock_x() const {
-	return KinematicBody::locked_axis[0];
-}
-
-bool KinematicBody::get_axis_lock_y() const {
-	return KinematicBody::locked_axis[1];
-}
-
-bool KinematicBody::get_axis_lock_z() const {
-	return KinematicBody::locked_axis[2];
+bool KinematicBody::get_axis_lock(PhysicsServer::BodyAxis p_axis) const {
+	return PhysicsServer::get_singleton()->body_is_axis_locked(get_rid(), p_axis);
 }
 
 void KinematicBody::set_safe_margin(float p_margin) {
@@ -1157,12 +1118,8 @@ void KinematicBody::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("is_on_wall"), &KinematicBody::is_on_wall);
 	ClassDB::bind_method(D_METHOD("get_floor_velocity"), &KinematicBody::get_floor_velocity);
 
-	ClassDB::bind_method(D_METHOD("set_axis_lock_x", "axis_lock_x"), &KinematicBody::set_axis_lock_x);
-	ClassDB::bind_method(D_METHOD("set_axis_lock_y", "axis_lock_y"), &KinematicBody::set_axis_lock_y);
-	ClassDB::bind_method(D_METHOD("set_axis_lock_z", "axis_lock_z"), &KinematicBody::set_axis_lock_z);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_x"), &KinematicBody::get_axis_lock_x);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_y"), &KinematicBody::get_axis_lock_y);
-	ClassDB::bind_method(D_METHOD("get_axis_lock_z"), &KinematicBody::get_axis_lock_z);
+	ClassDB::bind_method(D_METHOD("set_axis_lock", "axis", "lock"), &KinematicBody::set_axis_lock);
+	ClassDB::bind_method(D_METHOD("get_axis_lock", "axis"), &KinematicBody::get_axis_lock);
 
 	ClassDB::bind_method(D_METHOD("set_safe_margin", "pixels"), &KinematicBody::set_safe_margin);
 	ClassDB::bind_method(D_METHOD("get_safe_margin"), &KinematicBody::get_safe_margin);
@@ -1171,9 +1128,12 @@ void KinematicBody::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_slide_collision", "slide_idx"), &KinematicBody::_get_slide_collision);
 
 	ADD_GROUP("Axis Lock", "axis_lock_");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_x"), "set_axis_lock_x", "get_axis_lock_x");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_y"), "set_axis_lock_y", "get_axis_lock_y");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "axis_lock_z"), "set_axis_lock_z", "get_axis_lock_z");
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_x"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_X);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_y"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_Y);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_linear_z"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_LINEAR_Z);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_x"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_X);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_y"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_Y);
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "axis_lock_angular_z"), "set_axis_lock", "get_axis_lock", PhysicsServer::BODY_AXIS_ANGULAR_Z);
 
 	ADD_PROPERTY(PropertyInfo(Variant::REAL, "collision/safe_margin", PROPERTY_HINT_RANGE, "0.001,256,0.001"), "set_safe_margin", "get_safe_margin");
 }
@@ -1182,7 +1142,7 @@ KinematicBody::KinematicBody() :
 		PhysicsBody(PhysicsServer::BODY_MODE_KINEMATIC) {
 
 	margin = 0.001;
-
+	locked_axis = 0;
 	on_floor = false;
 	on_ceiling = false;
 	on_wall = false;
diff --git a/scene/3d/physics_body.h b/scene/3d/physics_body.h
index 57b120ef63..9d9feda0b2 100644
--- a/scene/3d/physics_body.h
+++ b/scene/3d/physics_body.h
@@ -132,8 +132,6 @@ private:
 	bool sleeping;
 	bool ccd;
 
-	bool locked_axis[3] = { false, false, false };
-
 	int max_contacts_reported;
 
 	bool custom_integrator;
@@ -238,12 +236,8 @@ public:
 	void set_use_continuous_collision_detection(bool p_enable);
 	bool is_using_continuous_collision_detection() const;
 
-	void set_axis_lock_x(bool p_lock);
-	void set_axis_lock_y(bool p_lock);
-	void set_axis_lock_z(bool p_lock);
-	bool get_axis_lock_x() const;
-	bool get_axis_lock_y() const;
-	bool get_axis_lock_z() const;
+	void set_axis_lock(PhysicsServer::BodyAxis p_axis, bool p_lock);
+	bool get_axis_lock(PhysicsServer::BodyAxis p_axis) const;
 
 	Array get_colliding_bodies() const;
 
@@ -277,7 +271,7 @@ public:
 	};
 
 private:
-	bool locked_axis[3] = { false, false, false };
+	uint16_t locked_axis;
 
 	float margin;
 
@@ -301,12 +295,8 @@ public:
 	bool move_and_collide(const Vector3 &p_motion, Collision &r_collision);
 	bool test_move(const Transform &p_from, const Vector3 &p_motion);
 
-	void set_axis_lock_x(bool p_lock);
-	void set_axis_lock_y(bool p_lock);
-	void set_axis_lock_z(bool p_lock);
-	bool get_axis_lock_x() const;
-	bool get_axis_lock_y() const;
-	bool get_axis_lock_z() const;
+	void set_axis_lock(PhysicsServer::BodyAxis p_axis, bool p_lock);
+	bool get_axis_lock(PhysicsServer::BodyAxis p_axis) const;
 
 	void set_safe_margin(float p_margin);
 	float get_safe_margin() const;
diff --git a/scene/animation/animation_player.cpp b/scene/animation/animation_player.cpp
index 91aa069060..e866e665d8 100644
--- a/scene/animation/animation_player.cpp
+++ b/scene/animation/animation_player.cpp
@@ -1327,7 +1327,7 @@ void AnimationPlayer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_root"), &AnimationPlayer::get_root);
 
 	ClassDB::bind_method(D_METHOD("seek", "seconds", "update"), &AnimationPlayer::seek, DEFVAL(false));
-	ClassDB::bind_method(D_METHOD("get_position"), &AnimationPlayer::get_current_animation_position);
+	ClassDB::bind_method(D_METHOD("advance", "delta"), &AnimationPlayer::advance);
 
 	ClassDB::bind_method(D_METHOD("find_animation", "animation"), &AnimationPlayer::find_animation);
 
@@ -1339,8 +1339,6 @@ void AnimationPlayer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_current_animation_position"), &AnimationPlayer::get_current_animation_position);
 	ClassDB::bind_method(D_METHOD("get_current_animation_length"), &AnimationPlayer::get_current_animation_length);
 
-	ClassDB::bind_method(D_METHOD("advance", "delta"), &AnimationPlayer::advance);
-
 	ADD_GROUP("Playback Options", "playback_");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "playback_process_mode", PROPERTY_HINT_ENUM, "Physics,Idle"), "set_animation_process_mode", "get_animation_process_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::REAL, "playback_default_blend_time", PROPERTY_HINT_RANGE, "0,4096,0.01"), "set_default_blend_time", "get_default_blend_time");
diff --git a/scene/gui/button.cpp b/scene/gui/button.cpp
index 1fa03f81f4..47977f0283 100644
--- a/scene/gui/button.cpp
+++ b/scene/gui/button.cpp
@@ -55,6 +55,10 @@ Size2 Button::get_minimum_size() const {
 	return get_stylebox("normal")->get_minimum_size() + minsize;
 }
 
+void Button::_set_internal_margin(Margin p_margin, float p_value) {
+	_internal_margin[p_margin] = p_value;
+}
+
 void Button::_notification(int p_what) {
 
 	if (p_what == NOTIFICATION_TRANSLATION_CHANGED) {
@@ -136,11 +140,11 @@ void Button::_notification(int p_what) {
 
 		Point2 icon_ofs = (!_icon.is_null()) ? Point2(_icon->get_width() + get_constant("hseparation"), 0) : Point2();
 		int text_clip = size.width - style->get_minimum_size().width - icon_ofs.width;
-		Point2 text_ofs = (size - style->get_minimum_size() - icon_ofs - font->get_string_size(xl_text)) / 2.0;
+		Point2 text_ofs = (size - style->get_minimum_size() - icon_ofs - font->get_string_size(xl_text) - Point2(_internal_margin[MARGIN_RIGHT] - _internal_margin[MARGIN_LEFT], 0)) / 2.0;
 
 		switch (align) {
 			case ALIGN_LEFT: {
-				text_ofs.x = style->get_margin(MARGIN_LEFT) + icon_ofs.x;
+				text_ofs.x = style->get_margin(MARGIN_LEFT) + icon_ofs.x + _internal_margin[MARGIN_LEFT] + get_constant("hseparation");
 				text_ofs.y += style->get_offset().y;
 			} break;
 			case ALIGN_CENTER: {
@@ -150,7 +154,11 @@ void Button::_notification(int p_what) {
 				text_ofs += style->get_offset();
 			} break;
 			case ALIGN_RIGHT: {
-				text_ofs.x = size.x - style->get_margin(MARGIN_RIGHT) - font->get_string_size(xl_text).x;
+				if (_internal_margin[MARGIN_RIGHT] > 0) {
+					text_ofs.x = size.x - style->get_margin(MARGIN_RIGHT) - font->get_string_size(xl_text).x - _internal_margin[MARGIN_RIGHT] - get_constant("hseparation");
+				} else {
+					text_ofs.x = size.x - style->get_margin(MARGIN_RIGHT) - font->get_string_size(xl_text).x;
+				}
 				text_ofs.y += style->get_offset().y;
 			} break;
 		}
@@ -162,7 +170,11 @@ void Button::_notification(int p_what) {
 			int valign = size.height - style->get_minimum_size().y;
 			if (is_disabled())
 				color_icon.a = 0.4;
-			_icon->draw(ci, style->get_offset() + Point2(0, Math::floor((valign - _icon->get_height()) / 2.0)), color_icon);
+			if (_internal_margin[MARGIN_LEFT] > 0) {
+				_icon->draw(ci, style->get_offset() + Point2(_internal_margin[MARGIN_LEFT] + get_constant("hseparation"), Math::floor((valign - _icon->get_height()) / 2.0)), color_icon);
+			} else {
+				_icon->draw(ci, style->get_offset() + Point2(0, Math::floor((valign - _icon->get_height()) / 2.0)), color_icon);
+			}
 		}
 	}
 }
@@ -253,7 +265,7 @@ void Button::_bind_methods() {
 	ADD_PROPERTYNZ(PropertyInfo(Variant::OBJECT, "icon", PROPERTY_HINT_RESOURCE_TYPE, "Texture"), "set_button_icon", "get_button_icon");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "flat"), "set_flat", "is_flat");
 	ADD_PROPERTYNZ(PropertyInfo(Variant::BOOL, "clip_text"), "set_clip_text", "get_clip_text");
-	ADD_PROPERTYNO(PropertyInfo(Variant::INT, "align", PROPERTY_HINT_ENUM, "Left,Center,Right"), "set_text_align", "get_text_align");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "align", PROPERTY_HINT_ENUM, "Left,Center,Right"), "set_text_align", "get_text_align");
 }
 
 Button::Button(const String &p_text) {
@@ -263,6 +275,10 @@ Button::Button(const String &p_text) {
 	set_mouse_filter(MOUSE_FILTER_STOP);
 	set_text(p_text);
 	align = ALIGN_CENTER;
+
+	for (int i = 0; i < 4; i++) {
+		_internal_margin[i] = 0;
+	}
 }
 
 Button::~Button() {
diff --git a/scene/gui/button.h b/scene/gui/button.h
index dd6e730b86..35488582de 100644
--- a/scene/gui/button.h
+++ b/scene/gui/button.h
@@ -53,9 +53,11 @@ private:
 	Ref<Texture> icon;
 	bool clip_text;
 	TextAlign align;
+	float _internal_margin[4];
 
 protected:
 	virtual Size2 get_minimum_size() const;
+	void _set_internal_margin(Margin p_margin, float p_value);
 	void _notification(int p_what);
 	static void _bind_methods();
 
diff --git a/scene/gui/check_box.cpp b/scene/gui/check_box.cpp
index 00f6153062..bf8156b92b 100644
--- a/scene/gui/check_box.cpp
+++ b/scene/gui/check_box.cpp
@@ -31,18 +31,54 @@
 
 #include "servers/visual_server.h"
 
+Size2 CheckBox::get_icon_size() const {
+	Ref<Texture> checked = Control::get_icon("checked");
+	Ref<Texture> unchecked = Control::get_icon("unchecked");
+	Ref<Texture> radio_checked = Control::get_icon("radio_checked");
+	Ref<Texture> radio_unchecked = Control::get_icon("radio_unchecked");
+
+	Size2 tex_size = Size2(0, 0);
+	if (!checked.is_null())
+		tex_size = Size2(checked->get_width(), checked->get_height());
+	if (!unchecked.is_null())
+		tex_size = Size2(MAX(tex_size.width, unchecked->get_width()), MAX(tex_size.height, unchecked->get_height()));
+	if (!radio_checked.is_null())
+		tex_size = Size2(MAX(tex_size.width, radio_checked->get_width()), MAX(tex_size.height, radio_checked->get_height()));
+	if (!radio_unchecked.is_null())
+		tex_size = Size2(MAX(tex_size.width, radio_unchecked->get_width()), MAX(tex_size.height, radio_unchecked->get_height()));
+	return tex_size;
+}
+
+Size2 CheckBox::get_minimum_size() const {
+
+	Size2 minsize = Button::get_minimum_size();
+	Size2 tex_size = get_icon_size();
+	minsize.width += tex_size.width;
+	if (get_text().length() > 0) {
+		minsize.width += get_constant("hseparation");
+	}
+	Ref<StyleBox> sb = get_stylebox("normal");
+	minsize.height = MAX(minsize.height, tex_size.height + sb->get_margin(MARGIN_TOP) + sb->get_margin(MARGIN_BOTTOM));
+
+	return minsize;
+}
+
 void CheckBox::_notification(int p_what) {
 
-	if (p_what == NOTIFICATION_DRAW) {
+	if (p_what == NOTIFICATION_THEME_CHANGED) {
+
+		_set_internal_margin(MARGIN_LEFT, get_icon_size().width);
+	} else if (p_what == NOTIFICATION_DRAW) {
 
 		RID ci = get_canvas_item();
 
 		Ref<Texture> on = Control::get_icon(is_radio() ? "radio_checked" : "checked");
 		Ref<Texture> off = Control::get_icon(is_radio() ? "radio_unchecked" : "unchecked");
+		Ref<StyleBox> sb = get_stylebox("normal");
 
 		Vector2 ofs;
-		ofs.x = 0;
-		ofs.y = int((get_size().height - on->get_height()) / 2);
+		ofs.x = sb->get_margin(MARGIN_LEFT);
+		ofs.y = int((get_size().height - get_icon_size().height) / 2);
 
 		if (is_pressed())
 			on->draw(ci, ofs);
@@ -60,6 +96,7 @@ CheckBox::CheckBox(const String &p_text) :
 		Button(p_text) {
 	set_toggle_mode(true);
 	set_text_align(ALIGN_LEFT);
+	_set_internal_margin(MARGIN_LEFT, get_icon_size().width);
 }
 
 CheckBox::~CheckBox() {
diff --git a/scene/gui/check_box.h b/scene/gui/check_box.h
index 4da06be8d1..3d3f170e8c 100644
--- a/scene/gui/check_box.h
+++ b/scene/gui/check_box.h
@@ -39,6 +39,8 @@ class CheckBox : public Button {
 	GDCLASS(CheckBox, Button);
 
 protected:
+	Size2 get_icon_size() const;
+	Size2 get_minimum_size() const;
 	void _notification(int p_what);
 
 	bool is_radio();
diff --git a/scene/gui/check_button.cpp b/scene/gui/check_button.cpp
index 77fdedd5e5..641d2d4f01 100644
--- a/scene/gui/check_button.cpp
+++ b/scene/gui/check_button.cpp
@@ -32,10 +32,7 @@
 #include "print_string.h"
 #include "servers/visual_server.h"
 
-Size2 CheckButton::get_minimum_size() const {
-
-	Size2 minsize = Button::get_minimum_size();
-
+Size2 CheckButton::get_icon_size() const {
 	Ref<Texture> on = Control::get_icon("on");
 	Ref<Texture> off = Control::get_icon("off");
 	Size2 tex_size = Size2(0, 0);
@@ -43,15 +40,29 @@ Size2 CheckButton::get_minimum_size() const {
 		tex_size = Size2(on->get_width(), on->get_height());
 	if (!off.is_null())
 		tex_size = Size2(MAX(tex_size.width, off->get_width()), MAX(tex_size.height, off->get_height()));
-	minsize += Size2(tex_size.width + get_constant("hseparation"), 0);
-	minsize.height = MAX(minsize.height, tex_size.height);
+	return tex_size;
+}
+
+Size2 CheckButton::get_minimum_size() const {
 
-	return get_stylebox("normal")->get_minimum_size() + minsize;
+	Size2 minsize = Button::get_minimum_size();
+	Size2 tex_size = get_icon_size();
+	minsize.width += tex_size.width;
+	if (get_text().length() > 0) {
+		minsize.width += get_constant("hseparation");
+	}
+	Ref<StyleBox> sb = get_stylebox("normal");
+	minsize.height = MAX(minsize.height, tex_size.height + sb->get_margin(MARGIN_TOP) + sb->get_margin(MARGIN_BOTTOM));
+
+	return minsize;
 }
 
 void CheckButton::_notification(int p_what) {
 
-	if (p_what == NOTIFICATION_DRAW) {
+	if (p_what == NOTIFICATION_THEME_CHANGED) {
+
+		_set_internal_margin(MARGIN_RIGHT, get_icon_size().width);
+	} else if (p_what == NOTIFICATION_DRAW) {
 
 		RID ci = get_canvas_item();
 
@@ -59,10 +70,11 @@ void CheckButton::_notification(int p_what) {
 		Ref<Texture> off = Control::get_icon("off");
 
 		Ref<StyleBox> sb = get_stylebox("normal");
-		Size2 sb_ofs = Size2(sb->get_margin(MARGIN_RIGHT), sb->get_margin(MARGIN_TOP));
 		Vector2 ofs;
-		ofs.x = get_minimum_size().width - (on->get_width() + sb_ofs.width);
-		ofs.y = sb_ofs.height;
+		Size2 tex_size = get_icon_size();
+
+		ofs.x = get_size().width - (tex_size.width + sb->get_margin(MARGIN_RIGHT));
+		ofs.y = (get_size().height - tex_size.height) / 2;
 
 		if (is_pressed())
 			on->draw(ci, ofs);
@@ -75,6 +87,8 @@ CheckButton::CheckButton() {
 
 	set_toggle_mode(true);
 	set_text_align(ALIGN_LEFT);
+
+	_set_internal_margin(MARGIN_RIGHT, get_icon_size().width);
 }
 
 CheckButton::~CheckButton() {
diff --git a/scene/gui/check_button.h b/scene/gui/check_button.h
index eb68943fe7..3103a40d3c 100644
--- a/scene/gui/check_button.h
+++ b/scene/gui/check_button.h
@@ -39,6 +39,7 @@ class CheckButton : public Button {
 	GDCLASS(CheckButton, Button);
 
 protected:
+	Size2 get_icon_size() const;
 	virtual Size2 get_minimum_size() const;
 	void _notification(int p_what);
 
diff --git a/scene/gui/control.cpp b/scene/gui/control.cpp
index adca78d1d4..81d2b6731f 100644
--- a/scene/gui/control.cpp
+++ b/scene/gui/control.cpp
@@ -2470,6 +2470,16 @@ Control::MouseFilter Control::get_mouse_filter() const {
 	return data.mouse_filter;
 }
 
+void Control::set_pass_on_modal_close_click(bool p_pass_on) {
+
+	data.pass_on_modal_close_click = p_pass_on;
+}
+
+bool Control::pass_on_modal_close_click() const {
+
+	return data.pass_on_modal_close_click;
+}
+
 Control *Control::get_focus_owner() const {
 
 	ERR_FAIL_COND_V(!is_inside_tree(), NULL);
@@ -2934,6 +2944,7 @@ Control::Control() {
 	data.parent = NULL;
 
 	data.mouse_filter = MOUSE_FILTER_STOP;
+	data.pass_on_modal_close_click = true;
 
 	data.SI = NULL;
 	data.MI = NULL;
diff --git a/scene/gui/control.h b/scene/gui/control.h
index 92d1c969fc..9ac0eb0be3 100644
--- a/scene/gui/control.h
+++ b/scene/gui/control.h
@@ -165,6 +165,8 @@ private:
 		bool pending_min_size_update;
 		Point2 custom_minimum_size;
 
+		bool pass_on_modal_close_click;
+
 		MouseFilter mouse_filter;
 
 		bool clip_contents;
@@ -401,6 +403,9 @@ public:
 	void set_mouse_filter(MouseFilter p_filter);
 	MouseFilter get_mouse_filter() const;
 
+	void set_pass_on_modal_close_click(bool p_pass_on);
+	bool pass_on_modal_close_click() const;
+
 	/* SKINNING */
 
 	void add_icon_override(const StringName &p_name, const Ref<Texture> &p_icon);
diff --git a/scene/gui/menu_button.cpp b/scene/gui/menu_button.cpp
index ac450616d6..d850553957 100644
--- a/scene/gui/menu_button.cpp
+++ b/scene/gui/menu_button.cpp
@@ -111,6 +111,7 @@ MenuButton::MenuButton() {
 	popup->hide();
 	add_child(popup);
 	popup->set_as_toplevel(true);
+	popup->set_pass_on_modal_close_click(false);
 	connect("button_up", popup, "call_deferred", make_binds("grab_click_focus"));
 	set_process_unhandled_key_input(true);
 	set_action_mode(ACTION_MODE_BUTTON_PRESS);
diff --git a/scene/gui/option_button.cpp b/scene/gui/option_button.cpp
index b4d0799945..70f3d9ca83 100644
--- a/scene/gui/option_button.cpp
+++ b/scene/gui/option_button.cpp
@@ -323,6 +323,7 @@ OptionButton::OptionButton() {
 	popup = memnew(PopupMenu);
 	popup->hide();
 	popup->set_as_toplevel(true);
+	popup->set_pass_on_modal_close_click(false);
 	add_child(popup);
 	popup->connect("id_pressed", this, "_selected");
 
diff --git a/scene/gui/popup_menu.cpp b/scene/gui/popup_menu.cpp
index 4ee6f93c9a..32f889e826 100644
--- a/scene/gui/popup_menu.cpp
+++ b/scene/gui/popup_menu.cpp
@@ -624,6 +624,20 @@ void PopupMenu::add_check_shortcut(const Ref<ShortCut> &p_shortcut, int p_ID, bo
 	update();
 }
 
+void PopupMenu::add_multistate_item(const String &p_label, int p_max_states, int p_default_state, int p_ID, uint32_t p_accel) {
+
+	Item item;
+	item.text = p_label;
+	item.xl_text = tr(p_label);
+	item.accel = p_accel;
+	item.ID = p_ID;
+	item.checkable = false;
+	item.max_states = p_max_states;
+	item.state = p_default_state;
+	items.push_back(item);
+	update();
+}
+
 void PopupMenu::set_item_text(int p_idx, const String &p_text) {
 
 	ERR_FAIL_INDEX(p_idx, items.size());
@@ -772,6 +786,11 @@ Ref<ShortCut> PopupMenu::get_item_shortcut(int p_idx) const {
 	return items[p_idx].shortcut;
 }
 
+int PopupMenu::get_item_state(int p_idx) const {
+	ERR_FAIL_INDEX_V(p_idx, items.size(), -1);
+	return items[p_idx].state;
+}
+
 void PopupMenu::set_item_as_separator(int p_idx, bool p_separator) {
 
 	ERR_FAIL_INDEX(p_idx, items.size());
@@ -820,6 +839,27 @@ void PopupMenu::set_item_h_offset(int p_idx, int p_offset) {
 	update();
 }
 
+void PopupMenu::set_item_multistate(int p_idx, int p_state) {
+
+	ERR_FAIL_INDEX(p_idx, items.size());
+	items[p_idx].state = p_state;
+	update();
+}
+
+void PopupMenu::toggle_item_multistate(int p_idx) {
+
+	ERR_FAIL_INDEX(p_idx, items.size());
+	if (0 >= items[p_idx].max_states) {
+		return;
+	}
+
+	++items[p_idx].state;
+	if (items[p_idx].max_states <= items[p_idx].state)
+		items[p_idx].state = 0;
+
+	update();
+}
+
 bool PopupMenu::is_item_checkable(int p_idx) const {
 	ERR_FAIL_INDEX_V(p_idx, items.size(), false);
 	return items[p_idx].checkable;
@@ -895,21 +935,34 @@ void PopupMenu::activate_item(int p_item) {
 	while (pop) {
 		// We close all parents that are chained together,
 		// with hide_on_item_selection enabled
-		if ((items[p_item].checkable && hide_on_checkable_item_selection && pop->is_hide_on_checkable_item_selection()) || (!items[p_item].checkable && hide_on_item_selection && pop->is_hide_on_item_selection())) {
-			pop->hide();
-			next = next->get_parent();
-			pop = Object::cast_to<PopupMenu>(next);
-		} else {
-			// Break out of loop when the next parent has
-			// hide_on_item_selection disabled
+
+		if (items[p_item].checkable) {
+			if (!hide_on_checkable_item_selection || !pop->is_hide_on_checkable_item_selection())
+				break;
+		} else if (0 < items[p_item].max_states) {
+			if (!hide_on_multistate_item_selection || !pop->is_hide_on_multistate_item_selection())
+				break;
+		} else if (!hide_on_item_selection || !pop->is_hide_on_item_selection())
 			break;
-		}
+
+		pop->hide();
+		next = next->get_parent();
+		pop = Object::cast_to<PopupMenu>(next);
 	}
+
 	// Hides popup by default; unless otherwise specified
 	// by using set_hide_on_item_selection and set_hide_on_checkable_item_selection
-	if ((items[p_item].checkable && hide_on_checkable_item_selection) || (!items[p_item].checkable && hide_on_item_selection)) {
-		hide();
-	}
+
+	if (items[p_item].checkable) {
+		if (!hide_on_checkable_item_selection)
+			return;
+	} else if (0 < items[p_item].max_states) {
+		if (!hide_on_multistate_item_selection)
+			return;
+	} else if (!hide_on_item_selection)
+		return;
+
+	hide();
 }
 
 void PopupMenu::remove_item(int p_idx) {
@@ -1025,7 +1078,7 @@ void PopupMenu::set_hide_on_item_selection(bool p_enabled) {
 	hide_on_item_selection = p_enabled;
 }
 
-bool PopupMenu::is_hide_on_item_selection() {
+bool PopupMenu::is_hide_on_item_selection() const {
 
 	return hide_on_item_selection;
 }
@@ -1035,11 +1088,21 @@ void PopupMenu::set_hide_on_checkable_item_selection(bool p_enabled) {
 	hide_on_checkable_item_selection = p_enabled;
 }
 
-bool PopupMenu::is_hide_on_checkable_item_selection() {
+bool PopupMenu::is_hide_on_checkable_item_selection() const {
 
 	return hide_on_checkable_item_selection;
 }
 
+void PopupMenu::set_hide_on_multistate_item_selection(bool p_enabled) {
+
+	hide_on_multistate_item_selection = p_enabled;
+}
+
+bool PopupMenu::is_hide_on_multistate_item_selection() const {
+
+	return hide_on_multistate_item_selection;
+}
+
 String PopupMenu::get_tooltip(const Point2 &p_pos) const {
 
 	int over = _get_mouse_over(p_pos);
@@ -1098,8 +1161,10 @@ void PopupMenu::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_item_as_checkable", "idx", "enable"), &PopupMenu::set_item_as_checkable);
 	ClassDB::bind_method(D_METHOD("set_item_tooltip", "idx", "tooltip"), &PopupMenu::set_item_tooltip);
 	ClassDB::bind_method(D_METHOD("set_item_shortcut", "idx", "shortcut", "global"), &PopupMenu::set_item_shortcut, DEFVAL(false));
+	ClassDB::bind_method(D_METHOD("set_item_multistate", "idx", "state"), &PopupMenu::set_item_multistate);
 
 	ClassDB::bind_method(D_METHOD("toggle_item_checked", "idx"), &PopupMenu::toggle_item_checked);
+	ClassDB::bind_method(D_METHOD("toggle_item_multistate", "idx"), &PopupMenu::toggle_item_multistate);
 
 	ClassDB::bind_method(D_METHOD("get_item_text", "idx"), &PopupMenu::get_item_text);
 	ClassDB::bind_method(D_METHOD("get_item_icon", "idx"), &PopupMenu::get_item_icon);
@@ -1131,6 +1196,9 @@ void PopupMenu::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_hide_on_checkable_item_selection", "enable"), &PopupMenu::set_hide_on_checkable_item_selection);
 	ClassDB::bind_method(D_METHOD("is_hide_on_checkable_item_selection"), &PopupMenu::is_hide_on_checkable_item_selection);
 
+	ClassDB::bind_method(D_METHOD("set_hide_on_state_item_selection", "enable"), &PopupMenu::set_hide_on_multistate_item_selection);
+	ClassDB::bind_method(D_METHOD("is_hide_on_state_item_selection"), &PopupMenu::is_hide_on_multistate_item_selection);
+
 	ClassDB::bind_method(D_METHOD("_submenu_timeout"), &PopupMenu::_submenu_timeout);
 
 	ADD_PROPERTY(PropertyInfo(Variant::ARRAY, "items", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR), "_set_items", "_get_items");
@@ -1154,6 +1222,7 @@ PopupMenu::PopupMenu() {
 	set_as_toplevel(true);
 	set_hide_on_item_selection(true);
 	set_hide_on_checkable_item_selection(true);
+	set_hide_on_multistate_item_selection(false);
 
 	submenu_timer = memnew(Timer);
 	submenu_timer->set_wait_time(0.3);
diff --git a/scene/gui/popup_menu.h b/scene/gui/popup_menu.h
index c9e9c8e311..ee514f4c4b 100644
--- a/scene/gui/popup_menu.h
+++ b/scene/gui/popup_menu.h
@@ -46,6 +46,8 @@ class PopupMenu : public Popup {
 		String xl_text;
 		bool checked;
 		bool checkable;
+		int max_states;
+		int state;
 		bool separator;
 		bool disabled;
 		int ID;
@@ -62,6 +64,8 @@ class PopupMenu : public Popup {
 			checked = false;
 			checkable = false;
 			separator = false;
+			max_states = 0;
+			state = 0;
 			accel = 0;
 			disabled = false;
 			_ofs_cache = 0;
@@ -86,6 +90,7 @@ class PopupMenu : public Popup {
 	bool invalidated_click;
 	bool hide_on_item_selection;
 	bool hide_on_checkable_item_selection;
+	bool hide_on_multistate_item_selection;
 	Vector2 moved;
 
 	Array _get_items() const;
@@ -115,6 +120,8 @@ public:
 	void add_icon_check_shortcut(const Ref<Texture> &p_icon, const Ref<ShortCut> &p_shortcut, int p_ID = -1, bool p_global = false);
 	void add_check_shortcut(const Ref<ShortCut> &p_shortcut, int p_ID = -1, bool p_global = false);
 
+	void add_multistate_item(const String &p_label, int p_max_states, int p_default_state, int p_ID = -1, uint32_t p_accel = 0);
+
 	void set_item_text(int p_idx, const String &p_text);
 	void set_item_icon(int p_idx, const Ref<Texture> &p_icon);
 	void set_item_checked(int p_idx, bool p_checked);
@@ -128,6 +135,8 @@ public:
 	void set_item_tooltip(int p_idx, const String &p_tooltip);
 	void set_item_shortcut(int p_idx, const Ref<ShortCut> &p_shortcut, bool p_global = false);
 	void set_item_h_offset(int p_idx, int p_offset);
+	void set_item_multistate(int p_idx, int p_state);
+	void toggle_item_multistate(int p_idx);
 
 	void toggle_item_checked(int p_idx);
 
@@ -145,6 +154,7 @@ public:
 	bool is_item_checkable(int p_idx) const;
 	String get_item_tooltip(int p_idx) const;
 	Ref<ShortCut> get_item_shortcut(int p_idx) const;
+	int get_item_state(int p_idx) const;
 
 	int get_item_count() const;
 
@@ -168,10 +178,13 @@ public:
 
 	void set_invalidate_click_until_motion();
 	void set_hide_on_item_selection(bool p_enabled);
-	bool is_hide_on_item_selection();
+	bool is_hide_on_item_selection() const;
 
 	void set_hide_on_checkable_item_selection(bool p_enabled);
-	bool is_hide_on_checkable_item_selection();
+	bool is_hide_on_checkable_item_selection() const;
+
+	void set_hide_on_multistate_item_selection(bool p_enabled);
+	bool is_hide_on_multistate_item_selection() const;
 
 	PopupMenu();
 	~PopupMenu();
diff --git a/scene/gui/rich_text_label.cpp b/scene/gui/rich_text_label.cpp
index 9cf4c105b4..45188c3a52 100644
--- a/scene/gui/rich_text_label.cpp
+++ b/scene/gui/rich_text_label.cpp
@@ -1822,7 +1822,7 @@ bool RichTextLabel::search(const String &p_string, bool p_from_selection) {
 
 void RichTextLabel::selection_copy() {
 
-	if (!selection.enabled)
+	if (!selection.active || !selection.enabled)
 		return;
 
 	String text;
diff --git a/scene/gui/rich_text_label.h b/scene/gui/rich_text_label.h
index 1096e3f650..b9a719dd10 100644
--- a/scene/gui/rich_text_label.h
+++ b/scene/gui/rich_text_label.h
@@ -256,8 +256,8 @@ private:
 		Item *to;
 		int to_char;
 
-		bool active;
-		bool enabled;
+		bool active; // anything selected? i.e. from, to, etc. valid?
+		bool enabled; // allow selections?
 	};
 
 	Selection selection;
diff --git a/scene/gui/tab_container.cpp b/scene/gui/tab_container.cpp
index 581034ddee..f2a2d862de 100644
--- a/scene/gui/tab_container.cpp
+++ b/scene/gui/tab_container.cpp
@@ -294,13 +294,17 @@ void TabContainer::_notification(int p_what) {
 			}
 		} break;
 		case NOTIFICATION_THEME_CHANGED: {
-			if (get_tab_count() > 0) {
-				call_deferred("set_current_tab", get_current_tab()); //wait until all changed theme
-			}
+			call_deferred("_on_theme_changed"); //wait until all changed theme
 		} break;
 	}
 }
 
+void TabContainer::_on_theme_changed() {
+	if (get_tab_count() > 0) {
+		set_current_tab(get_current_tab());
+	}
+}
+
 int TabContainer::_get_tab_width(int p_index) const {
 
 	ERR_FAIL_INDEX_V(p_index, get_tab_count(), 0);
@@ -658,6 +662,7 @@ void TabContainer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_popup"), &TabContainer::get_popup);
 
 	ClassDB::bind_method(D_METHOD("_child_renamed_callback"), &TabContainer::_child_renamed_callback);
+	ClassDB::bind_method(D_METHOD("_on_theme_changed"), &TabContainer::_on_theme_changed);
 
 	ADD_SIGNAL(MethodInfo("tab_changed", PropertyInfo(Variant::INT, "tab")));
 	ADD_SIGNAL(MethodInfo("tab_selected", PropertyInfo(Variant::INT, "tab")));
diff --git a/scene/gui/tab_container.h b/scene/gui/tab_container.h
index be59a16b3f..a36c4f3790 100644
--- a/scene/gui/tab_container.h
+++ b/scene/gui/tab_container.h
@@ -60,6 +60,7 @@ private:
 
 	Vector<Control *> _get_tabs() const;
 	int _get_tab_width(int p_index) const;
+	void _on_theme_changed();
 
 protected:
 	void _child_renamed_callback();
diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp
index a785abbc65..4635de81e8 100644
--- a/scene/main/viewport.cpp
+++ b/scene/main/viewport.cpp
@@ -1269,12 +1269,9 @@ Transform2D Viewport::_get_input_pre_xform() const {
 
 Vector2 Viewport::_get_window_offset() const {
 
-	/*
-	if (parent_control) {
-		return (parent_control->get_viewport()->get_final_transform() * parent_control->get_global_transform_with_canvas()).get_origin();
+	if (get_parent() && get_parent()->has_method("get_global_position")) {
+		return get_parent()->call("get_global_position");
 	}
-	*/
-
 	return Vector2();
 }
 
@@ -1649,6 +1646,8 @@ void Viewport::_gui_input_event(Ref<InputEvent> p_event) {
 
 			} else {
 
+				bool is_handled = false;
+
 				_gui_sort_modal_stack();
 				while (!gui.modal_stack.empty()) {
 
@@ -1666,11 +1665,20 @@ void Viewport::_gui_input_event(Ref<InputEvent> p_event) {
 						top->notification(Control::NOTIFICATION_MODAL_CLOSE);
 						top->_modal_stack_remove();
 						top->hide();
+
+						if (!top->pass_on_modal_close_click()) {
+							is_handled = true;
+						}
 					} else {
 						break;
 					}
 				}
 
+				if (is_handled) {
+					get_tree()->set_input_as_handled();
+					return;
+				}
+
 				//Matrix32 parent_xform;
 
 				/*
diff --git a/scene/resources/default_theme/default_theme.cpp b/scene/resources/default_theme/default_theme.cpp
index bb2c8750e3..f4e6c5e247 100644
--- a/scene/resources/default_theme/default_theme.cpp
+++ b/scene/resources/default_theme/default_theme.cpp
@@ -350,15 +350,15 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const
 	// CheckBox
 
 	Ref<StyleBox> cbx_empty = memnew(StyleBoxEmpty);
-	cbx_empty->set_default_margin(MARGIN_LEFT, 22 * scale);
+	cbx_empty->set_default_margin(MARGIN_LEFT, 4 * scale);
 	cbx_empty->set_default_margin(MARGIN_RIGHT, 4 * scale);
 	cbx_empty->set_default_margin(MARGIN_TOP, 4 * scale);
-	cbx_empty->set_default_margin(MARGIN_BOTTOM, 5 * scale);
+	cbx_empty->set_default_margin(MARGIN_BOTTOM, 4 * scale);
 	Ref<StyleBox> cbx_focus = focus;
 	cbx_focus->set_default_margin(MARGIN_LEFT, 4 * scale);
-	cbx_focus->set_default_margin(MARGIN_RIGHT, 22 * scale);
+	cbx_focus->set_default_margin(MARGIN_RIGHT, 4 * scale);
 	cbx_focus->set_default_margin(MARGIN_TOP, 4 * scale);
-	cbx_focus->set_default_margin(MARGIN_BOTTOM, 5 * scale);
+	cbx_focus->set_default_margin(MARGIN_BOTTOM, 4 * scale);
 
 	theme->set_stylebox("normal", "CheckBox", cbx_empty);
 	theme->set_stylebox("pressed", "CheckBox", cbx_empty);
@@ -385,7 +385,7 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const
 
 	Ref<StyleBox> cb_empty = memnew(StyleBoxEmpty);
 	cb_empty->set_default_margin(MARGIN_LEFT, 6 * scale);
-	cb_empty->set_default_margin(MARGIN_RIGHT, 70 * scale);
+	cb_empty->set_default_margin(MARGIN_RIGHT, 6 * scale);
 	cb_empty->set_default_margin(MARGIN_TOP, 4 * scale);
 	cb_empty->set_default_margin(MARGIN_BOTTOM, 4 * scale);
 
diff --git a/scene/resources/sky_box.cpp b/scene/resources/sky_box.cpp
index 2ef20f67f5..a2c3f1f111 100644
--- a/scene/resources/sky_box.cpp
+++ b/scene/resources/sky_box.cpp
@@ -180,7 +180,7 @@ Ref<Image> ProceduralSky::_generate_sky() {
 
 				normal.normalize();
 
-				float v_angle = Math::acos(normal.y);
+				float v_angle = Math::acos(CLAMP(normal.y, -1.0, 1.0));
 
 				Color color;
 
@@ -193,7 +193,7 @@ Ref<Image> ProceduralSky::_generate_sky() {
 					float c = v_angle / (Math_PI * 0.5);
 					color = sky_horizon_linear.linear_interpolate(sky_top_linear, Math::ease(1.0 - c, sky_curve));
 
-					float sun_angle = Math::rad2deg(Math::acos(sun.dot(normal)));
+					float sun_angle = Math::rad2deg(Math::acos(CLAMP(sun.dot(normal), -1.0, 1.0)));
 
 					if (sun_angle < sun_angle_min) {
 						color = color.blend(sun_color);
diff --git a/scene/resources/surface_tool.cpp b/scene/resources/surface_tool.cpp
index 352418e65c..d8600e041d 100644
--- a/scene/resources/surface_tool.cpp
+++ b/scene/resources/surface_tool.cpp
@@ -990,7 +990,7 @@ void SurfaceTool::_bind_methods() {
 
 	ClassDB::bind_method(D_METHOD("create_from", "existing", "surface"), &SurfaceTool::create_from);
 	ClassDB::bind_method(D_METHOD("append_from", "existing", "surface", "transform"), &SurfaceTool::append_from);
-	ClassDB::bind_method(D_METHOD("commit", "existing"), &SurfaceTool::commit, DEFVAL(Variant()), DEFVAL(Mesh::ARRAY_COMPRESS_DEFAULT));
+	ClassDB::bind_method(D_METHOD("commit", "existing", "flags"), &SurfaceTool::commit, DEFVAL(Variant()), DEFVAL(Mesh::ARRAY_COMPRESS_DEFAULT));
 }
 
 SurfaceTool::SurfaceTool() {
diff --git a/scene/resources/tile_set.cpp b/scene/resources/tile_set.cpp
index 1a46353fe3..bd6b917d4e 100644
--- a/scene/resources/tile_set.cpp
+++ b/scene/resources/tile_set.cpp
@@ -932,8 +932,8 @@ void TileSet::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("find_tile_by_name", "name"), &TileSet::find_tile_by_name);
 	ClassDB::bind_method(D_METHOD("get_tiles_ids"), &TileSet::_get_tiles_ids);
 
-	BIND_VMETHOD(MethodInfo("_is_tile_bound", PropertyInfo(Variant::INT, "drawn_id"), PropertyInfo(Variant::INT, "neighbor_id")));
-	BIND_VMETHOD(MethodInfo("_forward_subtile_selection", PropertyInfo(Variant::INT, "autotile_id"), PropertyInfo(Variant::INT, "bitmask"), PropertyInfo(Variant::OBJECT, "tilemap", PROPERTY_HINT_NONE, "TileMap"), PropertyInfo(Variant::VECTOR2, "tile_location")));
+	BIND_VMETHOD(MethodInfo(Variant::BOOL, "_is_tile_bound", PropertyInfo(Variant::INT, "drawn_id"), PropertyInfo(Variant::INT, "neighbor_id")));
+	BIND_VMETHOD(MethodInfo(Variant::VECTOR2, "_forward_subtile_selection", PropertyInfo(Variant::INT, "autotile_id"), PropertyInfo(Variant::INT, "bitmask"), PropertyInfo(Variant::OBJECT, "tilemap", PROPERTY_HINT_NONE, "TileMap"), PropertyInfo(Variant::VECTOR2, "tile_location")));
 
 	BIND_ENUM_CONSTANT(BITMASK_2X2);
 	BIND_ENUM_CONSTANT(BITMASK_3X3);
diff --git a/servers/physics/body_sw.cpp b/servers/physics/body_sw.cpp
index bba4d7a147..7fa7f0a45d 100644
--- a/servers/physics/body_sw.cpp
+++ b/servers/physics/body_sw.cpp
@@ -422,6 +422,18 @@ void BodySW::_compute_area_gravity_and_dampenings(const AreaSW *p_area) {
 	area_angular_damp += p_area->get_angular_damp();
 }
 
+void BodySW::set_axis_lock(PhysicsServer::BodyAxis p_axis, bool lock) {
+	if (lock) {
+		locked_axis |= p_axis;
+	} else {
+		locked_axis &= ~p_axis;
+	}
+}
+
+bool BodySW::is_axis_locked(PhysicsServer::BodyAxis p_axis) const {
+	return locked_axis & p_axis;
+}
+
 void BodySW::integrate_forces(real_t p_step) {
 
 	if (mode == PhysicsServer::BODY_MODE_STATIC)
@@ -559,17 +571,19 @@ void BodySW::integrate_velocities(real_t p_step) {
 	if (fi_callback)
 		get_space()->body_add_to_state_query_list(&direct_state_query_list);
 
-	//apply axis lock
-	if (locked_axis[0] || locked_axis[1] || locked_axis[2]) {
-		for (int i = 0; i < 3; i++) {
-			if (locked_axis[i]) {
-				linear_velocity[i] = 0;
-				biased_linear_velocity[i] = 0;
-				new_transform.origin[i] = get_transform().origin[i];
-			} else {
-				angular_velocity[i] = 0;
-				biased_angular_velocity[i] = 0;
-			}
+	//apply axis lock linear
+	for (int i = 0; i < 3; i++) {
+		if (is_axis_locked((PhysicsServer::BodyAxis)(1 << i))) {
+			linear_velocity[i] = 0;
+			biased_linear_velocity[i] = 0;
+			new_transform.origin[i] = get_transform().origin[i];
+		}
+	}
+	//apply axis lock angular
+	for (int i = 0; i < 3; i++) {
+		if (is_axis_locked((PhysicsServer::BodyAxis)(1 << (i + 3)))) {
+			angular_velocity[i] = 0;
+			biased_angular_velocity[i] = 0;
 		}
 	}
 
@@ -742,7 +756,8 @@ BodySW::BodySW() :
 		CollisionObjectSW(TYPE_BODY),
 		active_list(this),
 		inertia_update_list(this),
-		direct_state_query_list(this) {
+		direct_state_query_list(this),
+		locked_axis(0) {
 
 	mode = PhysicsServer::BODY_MODE_RIGID;
 	active = true;
diff --git a/servers/physics/body_sw.h b/servers/physics/body_sw.h
index aab6def1a9..b6aa76c70a 100644
--- a/servers/physics/body_sw.h
+++ b/servers/physics/body_sw.h
@@ -53,7 +53,7 @@ class BodySW : public CollisionObjectSW {
 	real_t angular_damp;
 	real_t gravity_scale;
 
-	bool locked_axis[3] = { false, false, false };
+	uint16_t locked_axis;
 
 	real_t kinematic_safe_margin;
 	real_t _inv_mass;
@@ -288,8 +288,8 @@ public:
 	_FORCE_INLINE_ Vector3 get_gravity() const { return gravity; }
 	_FORCE_INLINE_ real_t get_bounce() const { return bounce; }
 
-	_FORCE_INLINE_ void set_axis_lock(int axis, bool lock) { locked_axis[axis] = lock; }
-	_FORCE_INLINE_ bool get_axis_lock() const { return locked_axis; }
+	void set_axis_lock(PhysicsServer::BodyAxis p_axis, bool lock);
+	bool is_axis_locked(PhysicsServer::BodyAxis p_axis) const;
 
 	void integrate_forces(real_t p_step);
 	void integrate_velocities(real_t p_step);
diff --git a/servers/physics/physics_server_sw.cpp b/servers/physics/physics_server_sw.cpp
index 2909308366..0a1d524839 100644
--- a/servers/physics/physics_server_sw.cpp
+++ b/servers/physics/physics_server_sw.cpp
@@ -794,20 +794,20 @@ void PhysicsServerSW::body_set_axis_velocity(RID p_body, const Vector3 &p_axis_v
 	body->wakeup();
 };
 
-void PhysicsServerSW::body_set_axis_lock(RID p_body, int axis, bool lock) {
+void PhysicsServerSW::body_set_axis_lock(RID p_body, BodyAxis p_axis, bool lock) {
 
 	BodySW *body = body_owner.get(p_body);
 	ERR_FAIL_COND(!body);
 
-	body->set_axis_lock(axis, lock);
+	body->set_axis_lock(p_axis, lock);
 	body->wakeup();
 }
 
-bool PhysicsServerSW::body_get_axis_lock(RID p_body) const {
+bool PhysicsServerSW::body_is_axis_locked(RID p_body, BodyAxis p_axis) const {
 
 	const BodySW *body = body_owner.get(p_body);
 	ERR_FAIL_COND_V(!body, 0);
-	return body->get_axis_lock();
+	return body->is_axis_locked(p_axis);
 }
 
 void PhysicsServerSW::body_add_collision_exception(RID p_body, RID p_body_b) {
diff --git a/servers/physics/physics_server_sw.h b/servers/physics/physics_server_sw.h
index fea6e34ebd..71547f24c8 100644
--- a/servers/physics/physics_server_sw.h
+++ b/servers/physics/physics_server_sw.h
@@ -203,8 +203,8 @@ public:
 	virtual void body_apply_torque_impulse(RID p_body, const Vector3 &p_impulse);
 	virtual void body_set_axis_velocity(RID p_body, const Vector3 &p_axis_velocity);
 
-	virtual void body_set_axis_lock(RID p_body, int axis, bool p_lock);
-	virtual bool body_get_axis_lock(RID p_body) const;
+	virtual void body_set_axis_lock(RID p_body, BodyAxis p_axis, bool p_lock);
+	virtual bool body_is_axis_locked(RID p_body, BodyAxis p_axis) const;
 
 	virtual void body_add_collision_exception(RID p_body, RID p_body_b);
 	virtual void body_remove_collision_exception(RID p_body, RID p_body_b);
diff --git a/servers/physics_server.cpp b/servers/physics_server.cpp
index 9a9b20bf28..2aef12f04c 100644
--- a/servers/physics_server.cpp
+++ b/servers/physics_server.cpp
@@ -473,7 +473,7 @@ void PhysicsServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("body_set_axis_velocity", "body", "axis_velocity"), &PhysicsServer::body_set_axis_velocity);
 
 	ClassDB::bind_method(D_METHOD("body_set_axis_lock", "body", "axis", "lock"), &PhysicsServer::body_set_axis_lock);
-	ClassDB::bind_method(D_METHOD("body_get_axis_lock", "body"), &PhysicsServer::body_get_axis_lock);
+	ClassDB::bind_method(D_METHOD("body_is_axis_locked", "body", "axis"), &PhysicsServer::body_is_axis_locked);
 
 	ClassDB::bind_method(D_METHOD("body_add_collision_exception", "body", "excepted_body"), &PhysicsServer::body_add_collision_exception);
 	ClassDB::bind_method(D_METHOD("body_remove_collision_exception", "body", "excepted_body"), &PhysicsServer::body_remove_collision_exception);
@@ -702,6 +702,12 @@ void PhysicsServer::_bind_methods() {
 	BIND_ENUM_CONSTANT(SPACE_PARAM_BODY_TIME_TO_SLEEP);
 	BIND_ENUM_CONSTANT(SPACE_PARAM_BODY_ANGULAR_VELOCITY_DAMP_RATIO);
 	BIND_ENUM_CONSTANT(SPACE_PARAM_CONSTRAINT_DEFAULT_BIAS);
+	BIND_ENUM_CONSTANT(BODY_AXIS_LINEAR_X);
+	BIND_ENUM_CONSTANT(BODY_AXIS_LINEAR_Y);
+	BIND_ENUM_CONSTANT(BODY_AXIS_LINEAR_Z);
+	BIND_ENUM_CONSTANT(BODY_AXIS_ANGULAR_X);
+	BIND_ENUM_CONSTANT(BODY_AXIS_ANGULAR_Y);
+	BIND_ENUM_CONSTANT(BODY_AXIS_ANGULAR_Z);
 }
 
 PhysicsServer::PhysicsServer() {
diff --git a/servers/physics_server.h b/servers/physics_server.h
index 66c3a0afc4..341d02eb87 100644
--- a/servers/physics_server.h
+++ b/servers/physics_server.h
@@ -421,8 +421,17 @@ public:
 	virtual void body_apply_torque_impulse(RID p_body, const Vector3 &p_impulse) = 0;
 	virtual void body_set_axis_velocity(RID p_body, const Vector3 &p_axis_velocity) = 0;
 
-	virtual void body_set_axis_lock(RID p_body, int axis, bool lock) = 0;
-	virtual bool body_get_axis_lock(RID p_body) const = 0;
+	enum BodyAxis {
+		BODY_AXIS_LINEAR_X = 1 << 0,
+		BODY_AXIS_LINEAR_Y = 1 << 1,
+		BODY_AXIS_LINEAR_Z = 1 << 2,
+		BODY_AXIS_ANGULAR_X = 1 << 3,
+		BODY_AXIS_ANGULAR_Y = 1 << 4,
+		BODY_AXIS_ANGULAR_Z = 1 << 5
+	};
+
+	virtual void body_set_axis_lock(RID p_body, BodyAxis p_axis, bool p_lock) = 0;
+	virtual bool body_is_axis_locked(RID p_body, BodyAxis p_axis) const = 0;
 
 	//fix
 	virtual void body_add_collision_exception(RID p_body, RID p_body_b) = 0;
@@ -685,6 +694,7 @@ VARIANT_ENUM_CAST(PhysicsServer::AreaSpaceOverrideMode);
 VARIANT_ENUM_CAST(PhysicsServer::BodyMode);
 VARIANT_ENUM_CAST(PhysicsServer::BodyParameter);
 VARIANT_ENUM_CAST(PhysicsServer::BodyState);
+VARIANT_ENUM_CAST(PhysicsServer::BodyAxis);
 VARIANT_ENUM_CAST(PhysicsServer::PinJointParam);
 VARIANT_ENUM_CAST(PhysicsServer::JointType);
 VARIANT_ENUM_CAST(PhysicsServer::HingeJointParam);
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 7d2009cdd9..dd931b2fcb 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -183,7 +183,7 @@ TODO.
 ## libwebp
 
 - Upstream: https://chromium.googlesource.com/webm/libwebp/
-- Version: 0.6.0
+- Version: 0.6.1
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
diff --git a/thirdparty/libwebp/dsp/argb.c b/thirdparty/libwebp/dsp/argb.c
deleted file mode 100644
index cc1f9a96c3..0000000000
--- a/thirdparty/libwebp/dsp/argb.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions.
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-  }
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int i, offset = 0;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
-    offset += step;
-  }
-}
-
-void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                    const uint8_t*, int, uint32_t*);
-void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                   int, int, uint32_t*);
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-extern void VP8EncDspARGBInitSSE2(void);
-
-static volatile VP8CPUInfo argb_last_cpuinfo_used =
-    (VP8CPUInfo)&argb_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
-  if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      VP8EncDspARGBInitSSE2();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8EncDspARGBInitMIPSdspR2();
-    }
-#endif
-  }
-  argb_last_cpuinfo_used = VP8GetCPUInfo;
-}
diff --git a/thirdparty/libwebp/dsp/argb_mips_dsp_r2.c b/thirdparty/libwebp/dsp/argb_mips_dsp_r2.c
deleted file mode 100644
index af65acb8ff..0000000000
--- a/thirdparty/libwebp/dsp/argb_mips_dsp_r2.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (mips version).
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int temp0, temp1, temp2, temp3, offset;
-  const int rest = len & 1;
-  const uint32_t* const loop_end = out + len - rest;
-  const int step = 4;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int temp0, temp1, temp2, offset;
-  const int rest = len & 1;
-  const int a = 0xff;
-  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/argb_sse2.c b/thirdparty/libwebp/dsp/argb_sse2.c
deleted file mode 100644
index afcb1957e7..0000000000
--- a/thirdparty/libwebp/dsp/argb_sse2.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (SSE2 version).
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <string.h>
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  if (g == r + 1) {  // RGBA input order. Need to swap R and B.
-    int i = 0;
-    const int len_max = len & ~3;  // max length processed in main loop
-    const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
-    assert(b == r + 2);
-    assert(a == r + 3);
-    for (; i < len_max; i += 4) {
-      const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
-      const __m128i B = _mm_and_si128(A, red_blue_mask);     // R 0 B 0
-      const __m128i C = _mm_andnot_si128(red_blue_mask, A);  // 0 G 0 A
-      const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i F = _mm_or_si128(E, C);
-      _mm_storeu_si128((__m128i*)(out + i), F);
-    }
-    for (; i < len; ++i) {
-      out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-    }
-  } else {
-    assert(g == b + 1);
-    assert(r == b + 2);
-    assert(a == b + 3);
-    memcpy(out, b, len * 4);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
-  VP8PackARGB = PackARGB;
-}
-
-#else  // !WEBP_USE_SSE2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
-
-#endif  // WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/enc/backward_references_enc.c b/thirdparty/libwebp/enc/backward_references_enc.c
deleted file mode 100644
index 7c0559ff1e..0000000000
--- a/thirdparty/libwebp/enc/backward_references_enc.c
+++ /dev/null
@@ -1,1800 +0,0 @@
-// Copyright 2012 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Jyrki Alakuijala (jyrki@google.com)
-//
-
-#include <assert.h>
-#include <math.h>
-
-#include "./backward_references_enc.h"
-#include "./histogram_enc.h"
-#include "../dsp/lossless.h"
-#include "../dsp/lossless_common.h"
-#include "../dsp/dsp.h"
-#include "../utils/color_cache_utils.h"
-#include "../utils/utils.h"
-
-#define VALUES_IN_BYTE 256
-
-#define MIN_BLOCK_SIZE 256  // minimum block size for backward references
-
-#define MAX_ENTROPY    (1e30f)
-
-// 1M window (4M bytes) minus 120 special codes for short distances.
-#define WINDOW_SIZE_BITS 20
-#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
-
-// Minimum number of pixels for which it is cheaper to encode a
-// distance + length instead of each pixel as a literal.
-#define MIN_LENGTH 4
-// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it
-// is used in VP8LHashChain.
-#define MAX_LENGTH_BITS 12
-// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits.
-#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1)
-#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32
-#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32"
-#endif
-
-// -----------------------------------------------------------------------------
-
-static const uint8_t plane_to_code_lut[128] = {
- 96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
- 101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
- 102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
- 105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
- 110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
- 115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
- 118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
- 119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
-};
-
-static int DistanceToPlaneCode(int xsize, int dist) {
-  const int yoffset = dist / xsize;
-  const int xoffset = dist - yoffset * xsize;
-  if (xoffset <= 8 && yoffset < 8) {
-    return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
-  } else if (xoffset > xsize - 8 && yoffset < 7) {
-    return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
-  }
-  return dist + 120;
-}
-
-// Returns the exact index where array1 and array2 are different. For an index
-// inferior or equal to best_len_match, the return value just has to be strictly
-// inferior to best_len_match. The current behavior is to return 0 if this index
-// is best_len_match, and the index itself otherwise.
-// If no two elements are the same, it returns max_limit.
-static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
-                                       const uint32_t* const array2,
-                                       int best_len_match, int max_limit) {
-  // Before 'expensive' linear match, check if the two arrays match at the
-  // current best length index.
-  if (array1[best_len_match] != array2[best_len_match]) return 0;
-
-  return VP8LVectorMismatch(array1, array2, max_limit);
-}
-
-// -----------------------------------------------------------------------------
-//  VP8LBackwardRefs
-
-struct PixOrCopyBlock {
-  PixOrCopyBlock* next_;   // next block (or NULL)
-  PixOrCopy* start_;       // data start
-  int size_;               // currently used size
-};
-
-static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
-  assert(refs != NULL);
-  if (refs->tail_ != NULL) {
-    *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
-  }
-  refs->free_blocks_ = refs->refs_;
-  refs->tail_ = &refs->refs_;
-  refs->last_block_ = NULL;
-  refs->refs_ = NULL;
-}
-
-void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
-  assert(refs != NULL);
-  ClearBackwardRefs(refs);
-  while (refs->free_blocks_ != NULL) {
-    PixOrCopyBlock* const next = refs->free_blocks_->next_;
-    WebPSafeFree(refs->free_blocks_);
-    refs->free_blocks_ = next;
-  }
-}
-
-void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
-  assert(refs != NULL);
-  memset(refs, 0, sizeof(*refs));
-  refs->tail_ = &refs->refs_;
-  refs->block_size_ =
-      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
-}
-
-VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
-  VP8LRefsCursor c;
-  c.cur_block_ = refs->refs_;
-  if (refs->refs_ != NULL) {
-    c.cur_pos = c.cur_block_->start_;
-    c.last_pos_ = c.cur_pos + c.cur_block_->size_;
-  } else {
-    c.cur_pos = NULL;
-    c.last_pos_ = NULL;
-  }
-  return c;
-}
-
-void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
-  PixOrCopyBlock* const b = c->cur_block_->next_;
-  c->cur_pos = (b == NULL) ? NULL : b->start_;
-  c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
-  c->cur_block_ = b;
-}
-
-// Create a new block, either from the free list or allocated
-static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
-  PixOrCopyBlock* b = refs->free_blocks_;
-  if (b == NULL) {   // allocate new memory chunk
-    const size_t total_size =
-        sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
-    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
-    if (b == NULL) {
-      refs->error_ |= 1;
-      return NULL;
-    }
-    b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
-  } else {  // recycle from free-list
-    refs->free_blocks_ = b->next_;
-  }
-  *refs->tail_ = b;
-  refs->tail_ = &b->next_;
-  refs->last_block_ = b;
-  b->next_ = NULL;
-  b->size_ = 0;
-  return b;
-}
-
-static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
-                                              const PixOrCopy v) {
-  PixOrCopyBlock* b = refs->last_block_;
-  if (b == NULL || b->size_ == refs->block_size_) {
-    b = BackwardRefsNewBlock(refs);
-    if (b == NULL) return;   // refs->error_ is set
-  }
-  b->start_[b->size_++] = v;
-}
-
-int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
-                         VP8LBackwardRefs* const dst) {
-  const PixOrCopyBlock* b = src->refs_;
-  ClearBackwardRefs(dst);
-  assert(src->block_size_ == dst->block_size_);
-  while (b != NULL) {
-    PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
-    if (new_b == NULL) return 0;   // dst->error_ is set
-    memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
-    new_b->size_ = b->size_;
-    b = b->next_;
-  }
-  return 1;
-}
-
-// -----------------------------------------------------------------------------
-// Hash chains
-
-int VP8LHashChainInit(VP8LHashChain* const p, int size) {
-  assert(p->size_ == 0);
-  assert(p->offset_length_ == NULL);
-  assert(size > 0);
-  p->offset_length_ =
-      (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
-  if (p->offset_length_ == NULL) return 0;
-  p->size_ = size;
-
-  return 1;
-}
-
-void VP8LHashChainClear(VP8LHashChain* const p) {
-  assert(p != NULL);
-  WebPSafeFree(p->offset_length_);
-
-  p->size_ = 0;
-  p->offset_length_ = NULL;
-}
-
-// -----------------------------------------------------------------------------
-
-#define HASH_MULTIPLIER_HI (0xc6a4a793ULL)
-#define HASH_MULTIPLIER_LO (0x5bd1e996ULL)
-
-static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
-  uint32_t key;
-  key  = (argb[1] * HASH_MULTIPLIER_HI) & 0xffffffffu;
-  key += (argb[0] * HASH_MULTIPLIER_LO) & 0xffffffffu;
-  key = key >> (32 - HASH_BITS);
-  return key;
-}
-
-// Returns the maximum number of hash chain lookups to do for a
-// given compression quality. Return value in range [8, 86].
-static int GetMaxItersForQuality(int quality) {
-  return 8 + (quality * quality) / 128;
-}
-
-static int GetWindowSizeForHashChain(int quality, int xsize) {
-  const int max_window_size = (quality > 75) ? WINDOW_SIZE
-                            : (quality > 50) ? (xsize << 8)
-                            : (quality > 25) ? (xsize << 6)
-                            : (xsize << 4);
-  assert(xsize > 0);
-  return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
-}
-
-static WEBP_INLINE int MaxFindCopyLength(int len) {
-  return (len < MAX_LENGTH) ? len : MAX_LENGTH;
-}
-
-int VP8LHashChainFill(VP8LHashChain* const p, int quality,
-                      const uint32_t* const argb, int xsize, int ysize,
-                      int low_effort) {
-  const int size = xsize * ysize;
-  const int iter_max = GetMaxItersForQuality(quality);
-  const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
-  int pos;
-  int argb_comp;
-  uint32_t base_position;
-  int32_t* hash_to_first_index;
-  // Temporarily use the p->offset_length_ as a hash chain.
-  int32_t* chain = (int32_t*)p->offset_length_;
-  assert(size > 0);
-  assert(p->size_ != 0);
-  assert(p->offset_length_ != NULL);
-
-  if (size <= 2) {
-    p->offset_length_[0] = p->offset_length_[size - 1] = 0;
-    return 1;
-  }
-
-  hash_to_first_index =
-      (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
-  if (hash_to_first_index == NULL) return 0;
-
-  // Set the int32_t array to -1.
-  memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
-  // Fill the chain linking pixels with the same hash.
-  argb_comp = (argb[0] == argb[1]);
-  for (pos = 0; pos < size - 2;) {
-    uint32_t hash_code;
-    const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]);
-    if (argb_comp && argb_comp_next) {
-      // Consecutive pixels with the same color will share the same hash.
-      // We therefore use a different hash: the color and its repetition
-      // length.
-      uint32_t tmp[2];
-      uint32_t len = 1;
-      tmp[0] = argb[pos];
-      // Figure out how far the pixels are the same.
-      // The last pixel has a different 64 bit hash, as its next pixel does
-      // not have the same color, so we just need to get to the last pixel equal
-      // to its follower.
-      while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) {
-        ++len;
-      }
-      if (len > MAX_LENGTH) {
-        // Skip the pixels that match for distance=1 and length>MAX_LENGTH
-        // because they are linked to their predecessor and we automatically
-        // check that in the main for loop below. Skipping means setting no
-        // predecessor in the chain, hence -1.
-        memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain));
-        pos += len - MAX_LENGTH;
-        len = MAX_LENGTH;
-      }
-      // Process the rest of the hash chain.
-      while (len) {
-        tmp[1] = len--;
-        hash_code = GetPixPairHash64(tmp);
-        chain[pos] = hash_to_first_index[hash_code];
-        hash_to_first_index[hash_code] = pos++;
-      }
-      argb_comp = 0;
-    } else {
-      // Just move one pixel forward.
-      hash_code = GetPixPairHash64(argb + pos);
-      chain[pos] = hash_to_first_index[hash_code];
-      hash_to_first_index[hash_code] = pos++;
-      argb_comp = argb_comp_next;
-    }
-  }
-  // Process the penultimate pixel.
-  chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
-
-  WebPSafeFree(hash_to_first_index);
-
-  // Find the best match interval at each pixel, defined by an offset to the
-  // pixel and a length. The right-most pixel cannot match anything to the right
-  // (hence a best length of 0) and the left-most pixel nothing to the left
-  // (hence an offset of 0).
-  assert(size > 2);
-  p->offset_length_[0] = p->offset_length_[size - 1] = 0;
-  for (base_position = size - 2; base_position > 0;) {
-    const int max_len = MaxFindCopyLength(size - 1 - base_position);
-    const uint32_t* const argb_start = argb + base_position;
-    int iter = iter_max;
-    int best_length = 0;
-    uint32_t best_distance = 0;
-    uint32_t best_argb;
-    const int min_pos =
-        (base_position > window_size) ? base_position - window_size : 0;
-    const int length_max = (max_len < 256) ? max_len : 256;
-    uint32_t max_base_position;
-
-    pos = chain[base_position];
-    if (!low_effort) {
-      int curr_length;
-      // Heuristic: use the comparison with the above line as an initialization.
-      if (base_position >= (uint32_t)xsize) {
-        curr_length = FindMatchLength(argb_start - xsize, argb_start,
-                                      best_length, max_len);
-        if (curr_length > best_length) {
-          best_length = curr_length;
-          best_distance = xsize;
-        }
-        --iter;
-      }
-      // Heuristic: compare to the previous pixel.
-      curr_length =
-          FindMatchLength(argb_start - 1, argb_start, best_length, max_len);
-      if (curr_length > best_length) {
-        best_length = curr_length;
-        best_distance = 1;
-      }
-      --iter;
-      // Skip the for loop if we already have the maximum.
-      if (best_length == MAX_LENGTH) pos = min_pos - 1;
-    }
-    best_argb = argb_start[best_length];
-
-    for (; pos >= min_pos && --iter; pos = chain[pos]) {
-      int curr_length;
-      assert(base_position > (uint32_t)pos);
-
-      if (argb[pos + best_length] != best_argb) continue;
-
-      curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len);
-      if (best_length < curr_length) {
-        best_length = curr_length;
-        best_distance = base_position - pos;
-        best_argb = argb_start[best_length];
-        // Stop if we have reached a good enough length.
-        if (best_length >= length_max) break;
-      }
-    }
-    // We have the best match but in case the two intervals continue matching
-    // to the left, we have the best matches for the left-extended pixels.
-    max_base_position = base_position;
-    while (1) {
-      assert(best_length <= MAX_LENGTH);
-      assert(best_distance <= WINDOW_SIZE);
-      p->offset_length_[base_position] =
-          (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
-      --base_position;
-      // Stop if we don't have a match or if we are out of bounds.
-      if (best_distance == 0 || base_position == 0) break;
-      // Stop if we cannot extend the matching intervals to the left.
-      if (base_position < best_distance ||
-          argb[base_position - best_distance] != argb[base_position]) {
-        break;
-      }
-      // Stop if we are matching at its limit because there could be a closer
-      // matching interval with the same maximum length. Then again, if the
-      // matching interval is as close as possible (best_distance == 1), we will
-      // never find anything better so let's continue.
-      if (best_length == MAX_LENGTH && best_distance != 1 &&
-          base_position + MAX_LENGTH < max_base_position) {
-        break;
-      }
-      if (best_length < MAX_LENGTH) {
-        ++best_length;
-        max_base_position = base_position;
-      }
-    }
-  }
-  return 1;
-}
-
-static WEBP_INLINE int HashChainFindOffset(const VP8LHashChain* const p,
-                                           const int base_position) {
-  return p->offset_length_[base_position] >> MAX_LENGTH_BITS;
-}
-
-static WEBP_INLINE int HashChainFindLength(const VP8LHashChain* const p,
-                                           const int base_position) {
-  return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
-}
-
-static WEBP_INLINE void HashChainFindCopy(const VP8LHashChain* const p,
-                                          int base_position,
-                                          int* const offset_ptr,
-                                          int* const length_ptr) {
-  *offset_ptr = HashChainFindOffset(p, base_position);
-  *length_ptr = HashChainFindLength(p, base_position);
-}
-
-static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
-                                         VP8LColorCache* const hashers,
-                                         VP8LBackwardRefs* const refs) {
-  PixOrCopy v;
-  if (use_color_cache) {
-    const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
-    if (VP8LColorCacheLookup(hashers, key) == pixel) {
-      v = PixOrCopyCreateCacheIdx(key);
-    } else {
-      v = PixOrCopyCreateLiteral(pixel);
-      VP8LColorCacheSet(hashers, key, pixel);
-    }
-  } else {
-    v = PixOrCopyCreateLiteral(pixel);
-  }
-  BackwardRefsCursorAdd(refs, v);
-}
-
-static int BackwardReferencesRle(int xsize, int ysize,
-                                 const uint32_t* const argb,
-                                 int cache_bits, VP8LBackwardRefs* const refs) {
-  const int pix_count = xsize * ysize;
-  int i, k;
-  const int use_color_cache = (cache_bits > 0);
-  VP8LColorCache hashers;
-
-  if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
-    return 0;
-  }
-  ClearBackwardRefs(refs);
-  // Add first pixel as literal.
-  AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
-  i = 1;
-  while (i < pix_count) {
-    const int max_len = MaxFindCopyLength(pix_count - i);
-    const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
-    const int prev_row_len = (i < xsize) ? 0 :
-        FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
-    if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
-      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
-      // We don't need to update the color cache here since it is always the
-      // same pixel being copied, and that does not change the color cache
-      // state.
-      i += rle_len;
-    } else if (prev_row_len >= MIN_LENGTH) {
-      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
-      if (use_color_cache) {
-        for (k = 0; k < prev_row_len; ++k) {
-          VP8LColorCacheInsert(&hashers, argb[i + k]);
-        }
-      }
-      i += prev_row_len;
-    } else {
-      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
-      i++;
-    }
-  }
-  if (use_color_cache) VP8LColorCacheClear(&hashers);
-  return !refs->error_;
-}
-
-static int BackwardReferencesLz77(int xsize, int ysize,
-                                  const uint32_t* const argb, int cache_bits,
-                                  const VP8LHashChain* const hash_chain,
-                                  VP8LBackwardRefs* const refs) {
-  int i;
-  int i_last_check = -1;
-  int ok = 0;
-  int cc_init = 0;
-  const int use_color_cache = (cache_bits > 0);
-  const int pix_count = xsize * ysize;
-  VP8LColorCache hashers;
-
-  if (use_color_cache) {
-    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-    if (!cc_init) goto Error;
-  }
-  ClearBackwardRefs(refs);
-  for (i = 0; i < pix_count;) {
-    // Alternative#1: Code the pixels starting at 'i' using backward reference.
-    int offset = 0;
-    int len = 0;
-    int j;
-    HashChainFindCopy(hash_chain, i, &offset, &len);
-    if (len >= MIN_LENGTH) {
-      const int len_ini = len;
-      int max_reach = 0;
-      assert(i + len < pix_count);
-      // Only start from what we have not checked already.
-      i_last_check = (i > i_last_check) ? i : i_last_check;
-      // We know the best match for the current pixel but we try to find the
-      // best matches for the current pixel AND the next one combined.
-      // The naive method would use the intervals:
-      // [i,i+len) + [i+len, length of best match at i+len)
-      // while we check if we can use:
-      // [i,j) (where j<=i+len) + [j, length of best match at j)
-      for (j = i_last_check + 1; j <= i + len_ini; ++j) {
-        const int len_j = HashChainFindLength(hash_chain, j);
-        const int reach =
-            j + (len_j >= MIN_LENGTH ? len_j : 1);  // 1 for single literal.
-        if (reach > max_reach) {
-          len = j - i;
-          max_reach = reach;
-        }
-      }
-    } else {
-      len = 1;
-    }
-    // Go with literal or backward reference.
-    assert(len > 0);
-    if (len == 1) {
-      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
-    } else {
-      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
-      if (use_color_cache) {
-        for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]);
-      }
-    }
-    i += len;
-  }
-
-  ok = !refs->error_;
- Error:
-  if (cc_init) VP8LColorCacheClear(&hashers);
-  return ok;
-}
-
-// -----------------------------------------------------------------------------
-
-typedef struct {
-  double alpha_[VALUES_IN_BYTE];
-  double red_[VALUES_IN_BYTE];
-  double blue_[VALUES_IN_BYTE];
-  double distance_[NUM_DISTANCE_CODES];
-  double* literal_;
-} CostModel;
-
-static int BackwardReferencesTraceBackwards(
-    int xsize, int ysize, const uint32_t* const argb, int quality,
-    int cache_bits, const VP8LHashChain* const hash_chain,
-    VP8LBackwardRefs* const refs);
-
-static void ConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const uint32_t population_counts[], double output[]) {
-  uint32_t sum = 0;
-  int nonzeros = 0;
-  int i;
-  for (i = 0; i < num_symbols; ++i) {
-    sum += population_counts[i];
-    if (population_counts[i] > 0) {
-      ++nonzeros;
-    }
-  }
-  if (nonzeros <= 1) {
-    memset(output, 0, num_symbols * sizeof(*output));
-  } else {
-    const double logsum = VP8LFastLog2(sum);
-    for (i = 0; i < num_symbols; ++i) {
-      output[i] = logsum - VP8LFastLog2(population_counts[i]);
-    }
-  }
-}
-
-static int CostModelBuild(CostModel* const m, int cache_bits,
-                          VP8LBackwardRefs* const refs) {
-  int ok = 0;
-  VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
-  if (histo == NULL) goto Error;
-
-  VP8LHistogramCreate(histo, refs, cache_bits);
-
-  ConvertPopulationCountTableToBitEstimates(
-      VP8LHistogramNumCodes(histo->palette_code_bits_),
-      histo->literal_, m->literal_);
-  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->red_, m->red_);
-  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->blue_, m->blue_);
-  ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo->alpha_, m->alpha_);
-  ConvertPopulationCountTableToBitEstimates(
-      NUM_DISTANCE_CODES, histo->distance_, m->distance_);
-  ok = 1;
-
- Error:
-  VP8LFreeHistogram(histo);
-  return ok;
-}
-
-static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
-  return m->alpha_[v >> 24] +
-         m->red_[(v >> 16) & 0xff] +
-         m->literal_[(v >> 8) & 0xff] +
-         m->blue_[v & 0xff];
-}
-
-static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
-  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
-  return m->literal_[literal_idx];
-}
-
-static WEBP_INLINE double GetLengthCost(const CostModel* const m,
-                                        uint32_t length) {
-  int code, extra_bits;
-  VP8LPrefixEncodeBits(length, &code, &extra_bits);
-  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
-}
-
-static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
-                                          uint32_t distance) {
-  int code, extra_bits;
-  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
-  return m->distance_[code] + extra_bits;
-}
-
-static void AddSingleLiteralWithCostModel(const uint32_t* const argb,
-                                          VP8LColorCache* const hashers,
-                                          const CostModel* const cost_model,
-                                          int idx, int use_color_cache,
-                                          double prev_cost, float* const cost,
-                                          uint16_t* const dist_array) {
-  double cost_val = prev_cost;
-  const uint32_t color = argb[0];
-  const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
-  if (ix >= 0) {
-    // use_color_cache is true and hashers contains color
-    const double mul0 = 0.68;
-    cost_val += GetCacheCost(cost_model, ix) * mul0;
-  } else {
-    const double mul1 = 0.82;
-    if (use_color_cache) VP8LColorCacheInsert(hashers, color);
-    cost_val += GetLiteralCost(cost_model, color) * mul1;
-  }
-  if (cost[idx] > cost_val) {
-    cost[idx] = (float)cost_val;
-    dist_array[idx] = 1;  // only one is inserted.
-  }
-}
-
-// -----------------------------------------------------------------------------
-// CostManager and interval handling
-
-// Empirical value to avoid high memory consumption but good for performance.
-#define COST_CACHE_INTERVAL_SIZE_MAX 100
-
-// To perform backward reference every pixel at index index_ is considered and
-// the cost for the MAX_LENGTH following pixels computed. Those following pixels
-// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of:
-//     distance_cost_ at index_ + GetLengthCost(cost_model, k)
-//            (named cost)            (named cached cost)
-// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an
-// array of size MAX_LENGTH.
-// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the
-// minimal values using intervals, for which lower_ and upper_ bounds are kept.
-// An interval is defined by the index_ of the pixel that generated it and
-// is only useful in a range of indices from start_ to end_ (exclusive), i.e.
-// it contains the minimum value for pixels between start_ and end_.
-// Intervals are stored in a linked list and ordered by start_. When a new
-// interval has a better minimum, old intervals are split or removed.
-typedef struct CostInterval CostInterval;
-struct CostInterval {
-  double lower_;
-  double upper_;
-  int start_;
-  int end_;
-  double distance_cost_;
-  int index_;
-  CostInterval* previous_;
-  CostInterval* next_;
-};
-
-// The GetLengthCost(cost_model, k) part of the costs is also bounded for
-// efficiency in a set of intervals of a different type.
-// If those intervals are small enough, they are not used for comparison and
-// written into the costs right away.
-typedef struct {
-  double lower_;  // Lower bound of the interval.
-  double upper_;  // Upper bound of the interval.
-  int start_;
-  int end_;       // Exclusive.
-  int do_write_;  // If !=0, the interval is saved to cost instead of being kept
-                  // for comparison.
-} CostCacheInterval;
-
-// This structure is in charge of managing intervals and costs.
-// It caches the different CostCacheInterval, caches the different
-// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose
-// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX).
-#define COST_MANAGER_MAX_FREE_LIST 10
-typedef struct {
-  CostInterval* head_;
-  int count_;  // The number of stored intervals.
-  CostCacheInterval* cache_intervals_;
-  size_t cache_intervals_size_;
-  double cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
-  double min_cost_cache_;          // The minimum value in cost_cache_[1:].
-  double max_cost_cache_;          // The maximum value in cost_cache_[1:].
-  float* costs_;
-  uint16_t* dist_array_;
-  // Most of the time, we only need few intervals -> use a free-list, to avoid
-  // fragmentation with small allocs in most common cases.
-  CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST];
-  CostInterval* free_intervals_;
-  // These are regularly malloc'd remains. This list can't grow larger than than
-  // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note.
-  CostInterval* recycled_intervals_;
-  // Buffer used in BackwardReferencesHashChainDistanceOnly to store the ends
-  // of the intervals that can have impacted the cost at a pixel.
-  int* interval_ends_;
-  int interval_ends_size_;
-} CostManager;
-
-static int IsCostCacheIntervalWritable(int start, int end) {
-  // 100 is the length for which we consider an interval for comparison, and not
-  // for writing.
-  // The first intervals are very small and go in increasing size. This constant
-  // helps merging them into one big interval (up to index 150/200 usually from
-  // which intervals start getting much bigger).
-  // This value is empirical.
-  return (end - start + 1 < 100);
-}
-
-static void CostIntervalAddToFreeList(CostManager* const manager,
-                                      CostInterval* const interval) {
-  interval->next_ = manager->free_intervals_;
-  manager->free_intervals_ = interval;
-}
-
-static int CostIntervalIsInFreeList(const CostManager* const manager,
-                                    const CostInterval* const interval) {
-  return (interval >= &manager->intervals_[0] &&
-          interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]);
-}
-
-static void CostManagerInitFreeList(CostManager* const manager) {
-  int i;
-  manager->free_intervals_ = NULL;
-  for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) {
-    CostIntervalAddToFreeList(manager, &manager->intervals_[i]);
-  }
-}
-
-static void DeleteIntervalList(CostManager* const manager,
-                               const CostInterval* interval) {
-  while (interval != NULL) {
-    const CostInterval* const next = interval->next_;
-    if (!CostIntervalIsInFreeList(manager, interval)) {
-      WebPSafeFree((void*)interval);
-    }  // else: do nothing
-    interval = next;
-  }
-}
-
-static void CostManagerClear(CostManager* const manager) {
-  if (manager == NULL) return;
-
-  WebPSafeFree(manager->costs_);
-  WebPSafeFree(manager->cache_intervals_);
-  WebPSafeFree(manager->interval_ends_);
-
-  // Clear the interval lists.
-  DeleteIntervalList(manager, manager->head_);
-  manager->head_ = NULL;
-  DeleteIntervalList(manager, manager->recycled_intervals_);
-  manager->recycled_intervals_ = NULL;
-
-  // Reset pointers, count_ and cache_intervals_size_.
-  memset(manager, 0, sizeof(*manager));
-  CostManagerInitFreeList(manager);
-}
-
-static int CostManagerInit(CostManager* const manager,
-                           uint16_t* const dist_array, int pix_count,
-                           const CostModel* const cost_model) {
-  int i;
-  const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count;
-  // This constant is tied to the cost_model we use.
-  // Empirically, differences between intervals is usually of more than 1.
-  const double min_cost_diff = 0.1;
-
-  manager->costs_ = NULL;
-  manager->cache_intervals_ = NULL;
-  manager->interval_ends_ = NULL;
-  manager->head_ = NULL;
-  manager->recycled_intervals_ = NULL;
-  manager->count_ = 0;
-  manager->dist_array_ = dist_array;
-  CostManagerInitFreeList(manager);
-
-  // Fill in the cost_cache_.
-  manager->cache_intervals_size_ = 1;
-  manager->cost_cache_[0] = 0;
-  for (i = 1; i < cost_cache_size; ++i) {
-    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
-    // Get an approximation of the number of bound intervals.
-    if (fabs(manager->cost_cache_[i] - manager->cost_cache_[i - 1]) >
-        min_cost_diff) {
-      ++manager->cache_intervals_size_;
-    }
-    // Compute the minimum of cost_cache_.
-    if (i == 1) {
-      manager->min_cost_cache_ = manager->cost_cache_[1];
-      manager->max_cost_cache_ = manager->cost_cache_[1];
-    } else if (manager->cost_cache_[i] < manager->min_cost_cache_) {
-      manager->min_cost_cache_ = manager->cost_cache_[i];
-    } else if (manager->cost_cache_[i] > manager->max_cost_cache_) {
-      manager->max_cost_cache_ = manager->cost_cache_[i];
-    }
-  }
-
-  // With the current cost models, we have 15 intervals, so we are safe by
-  // setting a maximum of COST_CACHE_INTERVAL_SIZE_MAX.
-  if (manager->cache_intervals_size_ > COST_CACHE_INTERVAL_SIZE_MAX) {
-    manager->cache_intervals_size_ = COST_CACHE_INTERVAL_SIZE_MAX;
-  }
-  manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc(
-      manager->cache_intervals_size_, sizeof(*manager->cache_intervals_));
-  if (manager->cache_intervals_ == NULL) {
-    CostManagerClear(manager);
-    return 0;
-  }
-
-  // Fill in the cache_intervals_.
-  {
-    double cost_prev = -1e38f;  // unprobably low initial value
-    CostCacheInterval* prev = NULL;
-    CostCacheInterval* cur = manager->cache_intervals_;
-    const CostCacheInterval* const end =
-        manager->cache_intervals_ + manager->cache_intervals_size_;
-
-    // Consecutive values in cost_cache_ are compared and if a big enough
-    // difference is found, a new interval is created and bounded.
-    for (i = 0; i < cost_cache_size; ++i) {
-      const double cost_val = manager->cost_cache_[i];
-      if (i == 0 ||
-          (fabs(cost_val - cost_prev) > min_cost_diff && cur + 1 < end)) {
-        if (i > 1) {
-          const int is_writable =
-              IsCostCacheIntervalWritable(cur->start_, cur->end_);
-          // Merge with the previous interval if both are writable.
-          if (is_writable && cur != manager->cache_intervals_ &&
-              prev->do_write_) {
-            // Update the previous interval.
-            prev->end_ = cur->end_;
-            if (cur->lower_ < prev->lower_) {
-              prev->lower_ = cur->lower_;
-            } else if (cur->upper_ > prev->upper_) {
-              prev->upper_ = cur->upper_;
-            }
-          } else {
-            cur->do_write_ = is_writable;
-            prev = cur;
-            ++cur;
-          }
-        }
-        // Initialize an interval.
-        cur->start_ = i;
-        cur->do_write_ = 0;
-        cur->lower_ = cost_val;
-        cur->upper_ = cost_val;
-      } else {
-        // Update the current interval bounds.
-        if (cost_val < cur->lower_) {
-          cur->lower_ = cost_val;
-        } else if (cost_val > cur->upper_) {
-          cur->upper_ = cost_val;
-        }
-      }
-      cur->end_ = i + 1;
-      cost_prev = cost_val;
-    }
-    manager->cache_intervals_size_ = cur + 1 - manager->cache_intervals_;
-  }
-
-  manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
-  if (manager->costs_ == NULL) {
-    CostManagerClear(manager);
-    return 0;
-  }
-  // Set the initial costs_ high for every pixel as we will keep the minimum.
-  for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
-
-  // The cost at pixel is influenced by the cost intervals from previous pixels.
-  // Let us take the specific case where the offset is the same (which actually
-  // happens a lot in case of uniform regions).
-  // pixel i contributes to j>i a cost of: offset cost + cost_cache_[j-i]
-  // pixel i+1 contributes to j>i a cost of: 2*offset cost + cost_cache_[j-i-1]
-  // pixel i+2 contributes to j>i a cost of: 3*offset cost + cost_cache_[j-i-2]
-  // and so on.
-  // A pixel i influences the following length(j) < MAX_LENGTH pixels. What is
-  // the value of j such that pixel i + j cannot influence any of those pixels?
-  // This value is such that:
-  //               max of cost_cache_ < j*offset cost + min of cost_cache_
-  // (pixel i + j 's cost cannot beat the worst cost given by pixel i).
-  // This value will be used to optimize the cost computation in
-  // BackwardReferencesHashChainDistanceOnly.
-  {
-    // The offset cost is computed in GetDistanceCost and has a minimum value of
-    // the minimum in cost_model->distance_. The case where the offset cost is 0
-    // will be dealt with differently later so we are only interested in the
-    // minimum non-zero offset cost.
-    double offset_cost_min = 0.;
-    int size;
-    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-      if (cost_model->distance_[i] != 0) {
-        if (offset_cost_min == 0.) {
-          offset_cost_min = cost_model->distance_[i];
-        } else if (cost_model->distance_[i] < offset_cost_min) {
-          offset_cost_min = cost_model->distance_[i];
-        }
-      }
-    }
-    // In case all the cost_model->distance_ is 0, the next non-zero cost we
-    // can have is from the extra bit in GetDistanceCost, hence 1.
-    if (offset_cost_min < 1.) offset_cost_min = 1.;
-
-    size = 1 + (int)ceil((manager->max_cost_cache_ - manager->min_cost_cache_) /
-                         offset_cost_min);
-    // Empirically, we usually end up with a value below 100.
-    if (size > MAX_LENGTH) size = MAX_LENGTH;
-
-    manager->interval_ends_ =
-        (int*)WebPSafeMalloc(size, sizeof(*manager->interval_ends_));
-    if (manager->interval_ends_ == NULL) {
-      CostManagerClear(manager);
-      return 0;
-    }
-    manager->interval_ends_size_ = size;
-  }
-
-  return 1;
-}
-
-// Given the distance_cost for pixel 'index', update the cost at pixel 'i' if it
-// is smaller than the previously computed value.
-static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, int index,
-                                   double distance_cost) {
-  int k = i - index;
-  double cost_tmp;
-  assert(k >= 0 && k < MAX_LENGTH);
-  cost_tmp = distance_cost + manager->cost_cache_[k];
-
-  if (manager->costs_[i] > cost_tmp) {
-    manager->costs_[i] = (float)cost_tmp;
-    manager->dist_array_[i] = k + 1;
-  }
-}
-
-// Given the distance_cost for pixel 'index', update the cost for all the pixels
-// between 'start' and 'end' excluded.
-static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager,
-                                              int start, int end, int index,
-                                              double distance_cost) {
-  int i;
-  for (i = start; i < end; ++i) UpdateCost(manager, i, index, distance_cost);
-}
-
-// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'.
-static WEBP_INLINE void ConnectIntervals(CostManager* const manager,
-                                         CostInterval* const prev,
-                                         CostInterval* const next) {
-  if (prev != NULL) {
-    prev->next_ = next;
-  } else {
-    manager->head_ = next;
-  }
-
-  if (next != NULL) next->previous_ = prev;
-}
-
-// Pop an interval in the manager.
-static WEBP_INLINE void PopInterval(CostManager* const manager,
-                                    CostInterval* const interval) {
-  CostInterval* const next = interval->next_;
-
-  if (interval == NULL) return;
-
-  ConnectIntervals(manager, interval->previous_, next);
-  if (CostIntervalIsInFreeList(manager, interval)) {
-    CostIntervalAddToFreeList(manager, interval);
-  } else {  // recycle regularly malloc'd intervals too
-    interval->next_ = manager->recycled_intervals_;
-    manager->recycled_intervals_ = interval;
-  }
-  --manager->count_;
-  assert(manager->count_ >= 0);
-}
-
-// Update the cost at index i by going over all the stored intervals that
-// overlap with i.
-static WEBP_INLINE void UpdateCostPerIndex(CostManager* const manager, int i) {
-  CostInterval* current = manager->head_;
-
-  while (current != NULL && current->start_ <= i) {
-    if (current->end_ <= i) {
-      // We have an outdated interval, remove it.
-      CostInterval* next = current->next_;
-      PopInterval(manager, current);
-      current = next;
-    } else {
-      UpdateCost(manager, i, current->index_, current->distance_cost_);
-      current = current->next_;
-    }
-  }
-}
-
-// Given a current orphan interval and its previous interval, before
-// it was orphaned (which can be NULL), set it at the right place in the list
-// of intervals using the start_ ordering and the previous interval as a hint.
-static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
-                                               CostInterval* const current,
-                                               CostInterval* previous) {
-  assert(current != NULL);
-
-  if (previous == NULL) previous = manager->head_;
-  while (previous != NULL && current->start_ < previous->start_) {
-    previous = previous->previous_;
-  }
-  while (previous != NULL && previous->next_ != NULL &&
-         previous->next_->start_ < current->start_) {
-    previous = previous->next_;
-  }
-
-  if (previous != NULL) {
-    ConnectIntervals(manager, current, previous->next_);
-  } else {
-    ConnectIntervals(manager, current, manager->head_);
-  }
-  ConnectIntervals(manager, previous, current);
-}
-
-// Insert an interval in the list contained in the manager by starting at
-// interval_in as a hint. The intervals are sorted by start_ value.
-static WEBP_INLINE void InsertInterval(CostManager* const manager,
-                                       CostInterval* const interval_in,
-                                       double distance_cost, double lower,
-                                       double upper, int index, int start,
-                                       int end) {
-  CostInterval* interval_new;
-
-  if (IsCostCacheIntervalWritable(start, end) ||
-      manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) {
-    // Write down the interval if it is too small.
-    UpdateCostPerInterval(manager, start, end, index, distance_cost);
-    return;
-  }
-  if (manager->free_intervals_ != NULL) {
-    interval_new = manager->free_intervals_;
-    manager->free_intervals_ = interval_new->next_;
-  } else if (manager->recycled_intervals_ != NULL) {
-    interval_new = manager->recycled_intervals_;
-    manager->recycled_intervals_ = interval_new->next_;
-  } else {   // malloc for good
-    interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new));
-    if (interval_new == NULL) {
-      // Write down the interval if we cannot create it.
-      UpdateCostPerInterval(manager, start, end, index, distance_cost);
-      return;
-    }
-  }
-
-  interval_new->distance_cost_ = distance_cost;
-  interval_new->lower_ = lower;
-  interval_new->upper_ = upper;
-  interval_new->index_ = index;
-  interval_new->start_ = start;
-  interval_new->end_ = end;
-  PositionOrphanInterval(manager, interval_new, interval_in);
-
-  ++manager->count_;
-}
-
-// When an interval has its start_ or end_ modified, it needs to be
-// repositioned in the linked list.
-static WEBP_INLINE void RepositionInterval(CostManager* const manager,
-                                           CostInterval* const interval) {
-  if (IsCostCacheIntervalWritable(interval->start_, interval->end_)) {
-    // Maybe interval has been resized and is small enough to be removed.
-    UpdateCostPerInterval(manager, interval->start_, interval->end_,
-                          interval->index_, interval->distance_cost_);
-    PopInterval(manager, interval);
-    return;
-  }
-
-  // Early exit if interval is at the right spot.
-  if ((interval->previous_ == NULL ||
-       interval->previous_->start_ <= interval->start_) &&
-      (interval->next_ == NULL ||
-       interval->start_ <= interval->next_->start_)) {
-    return;
-  }
-
-  ConnectIntervals(manager, interval->previous_, interval->next_);
-  PositionOrphanInterval(manager, interval, interval->previous_);
-}
-
-// Given a new cost interval defined by its start at index, its last value and
-// distance_cost, add its contributions to the previous intervals and costs.
-// If handling the interval or one of its subintervals becomes to heavy, its
-// contribution is added to the costs right away.
-static WEBP_INLINE void PushInterval(CostManager* const manager,
-                                     double distance_cost, int index,
-                                     int last) {
-  size_t i;
-  CostInterval* interval = manager->head_;
-  CostInterval* interval_next;
-  const CostCacheInterval* const cost_cache_intervals =
-      manager->cache_intervals_;
-
-  for (i = 0; i < manager->cache_intervals_size_ &&
-              cost_cache_intervals[i].start_ < last;
-       ++i) {
-    // Define the intersection of the ith interval with the new one.
-    int start = index + cost_cache_intervals[i].start_;
-    const int end = index + (cost_cache_intervals[i].end_ > last
-                                 ? last
-                                 : cost_cache_intervals[i].end_);
-    const double lower_in = cost_cache_intervals[i].lower_;
-    const double upper_in = cost_cache_intervals[i].upper_;
-    const double lower_full_in = distance_cost + lower_in;
-    const double upper_full_in = distance_cost + upper_in;
-
-    if (cost_cache_intervals[i].do_write_) {
-      UpdateCostPerInterval(manager, start, end, index, distance_cost);
-      continue;
-    }
-
-    for (; interval != NULL && interval->start_ < end && start < end;
-         interval = interval_next) {
-      const double lower_full_interval =
-          interval->distance_cost_ + interval->lower_;
-      const double upper_full_interval =
-          interval->distance_cost_ + interval->upper_;
-
-      interval_next = interval->next_;
-
-      // Make sure we have some overlap
-      if (start >= interval->end_) continue;
-
-      if (lower_full_in >= upper_full_interval) {
-        // When intervals are represented, the lower, the better.
-        // [**********************************************************]
-        // start                                                    end
-        //                   [----------------------------------]
-        //                   interval->start_       interval->end_
-        // If we are worse than what we already have, add whatever we have so
-        // far up to interval.
-        const int start_new = interval->end_;
-        InsertInterval(manager, interval, distance_cost, lower_in, upper_in,
-                       index, start, interval->start_);
-        start = start_new;
-        continue;
-      }
-
-      // We know the two intervals intersect.
-      if (upper_full_in >= lower_full_interval) {
-        // There is no clear cut on which is best, so let's keep both.
-        // [*********[*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*]***********]
-        // start     interval->start_     interval->end_         end
-        // OR
-        // [*********[*-*-*-*-*-*-*-*-*-*-*-]----------------------]
-        // start     interval->start_     end          interval->end_
-        const int end_new = (interval->end_ <= end) ? interval->end_ : end;
-        InsertInterval(manager, interval, distance_cost, lower_in, upper_in,
-                       index, start, end_new);
-        start = end_new;
-      } else if (start <= interval->start_ && interval->end_ <= end) {
-        //                   [----------------------------------]
-        //                   interval->start_       interval->end_
-        // [**************************************************************]
-        // start                                                        end
-        // We can safely remove the old interval as it is fully included.
-        PopInterval(manager, interval);
-      } else {
-        if (interval->start_ <= start && end <= interval->end_) {
-          // [--------------------------------------------------------------]
-          // interval->start_                                  interval->end_
-          //                     [*****************************]
-          //                     start                       end
-          // We have to split the old interval as it fully contains the new one.
-          const int end_original = interval->end_;
-          interval->end_ = start;
-          InsertInterval(manager, interval, interval->distance_cost_,
-                         interval->lower_, interval->upper_, interval->index_,
-                         end, end_original);
-        } else if (interval->start_ < start) {
-          // [------------------------------------]
-          // interval->start_        interval->end_
-          //                     [*****************************]
-          //                     start                       end
-          interval->end_ = start;
-        } else {
-          //              [------------------------------------]
-          //              interval->start_        interval->end_
-          // [*****************************]
-          // start                       end
-          interval->start_ = end;
-        }
-
-        // The interval has been modified, we need to reposition it or write it.
-        RepositionInterval(manager, interval);
-      }
-    }
-    // Insert the remaining interval from start to end.
-    InsertInterval(manager, interval, distance_cost, lower_in, upper_in, index,
-                   start, end);
-  }
-}
-
-static int BackwardReferencesHashChainDistanceOnly(
-    int xsize, int ysize, const uint32_t* const argb, int quality,
-    int cache_bits, const VP8LHashChain* const hash_chain,
-    VP8LBackwardRefs* const refs, uint16_t* const dist_array) {
-  int i;
-  int ok = 0;
-  int cc_init = 0;
-  const int pix_count = xsize * ysize;
-  const int use_color_cache = (cache_bits > 0);
-  const size_t literal_array_size = sizeof(double) *
-      (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
-       ((cache_bits > 0) ? (1 << cache_bits) : 0));
-  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
-  CostModel* const cost_model =
-      (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
-  VP8LColorCache hashers;
-  const int skip_length = 32 + quality;
-  const int skip_min_distance_code = 2;
-  CostManager* cost_manager =
-      (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
-
-  if (cost_model == NULL || cost_manager == NULL) goto Error;
-
-  cost_model->literal_ = (double*)(cost_model + 1);
-  if (use_color_cache) {
-    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-    if (!cc_init) goto Error;
-  }
-
-  if (!CostModelBuild(cost_model, cache_bits, refs)) {
-    goto Error;
-  }
-
-  if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) {
-    goto Error;
-  }
-
-  // We loop one pixel at a time, but store all currently best points to
-  // non-processed locations from this point.
-  dist_array[0] = 0;
-  // Add first pixel as literal.
-  AddSingleLiteralWithCostModel(argb + 0, &hashers, cost_model, 0,
-                                use_color_cache, 0.0, cost_manager->costs_,
-                                dist_array);
-
-  for (i = 1; i < pix_count - 1; ++i) {
-    int offset = 0, len = 0;
-    double prev_cost = cost_manager->costs_[i - 1];
-    HashChainFindCopy(hash_chain, i, &offset, &len);
-    if (len >= 2) {
-      // If we are dealing with a non-literal.
-      const int code = DistanceToPlaneCode(xsize, offset);
-      const double offset_cost = GetDistanceCost(cost_model, code);
-      const int first_i = i;
-      int j_max = 0, interval_ends_index = 0;
-      const int is_offset_zero = (offset_cost == 0.);
-
-      if (!is_offset_zero) {
-        j_max = (int)ceil(
-            (cost_manager->max_cost_cache_ - cost_manager->min_cost_cache_) /
-            offset_cost);
-        if (j_max < 1) {
-          j_max = 1;
-        } else if (j_max > cost_manager->interval_ends_size_ - 1) {
-          // This could only happen in the case of MAX_LENGTH.
-          j_max = cost_manager->interval_ends_size_ - 1;
-        }
-      }  // else j_max is unused anyway.
-
-      // Instead of considering all contributions from a pixel i by calling:
-      //         PushInterval(cost_manager, prev_cost + offset_cost, i, len);
-      // we optimize these contributions in case offset_cost stays the same for
-      // consecutive pixels. This describes a set of pixels similar to a
-      // previous set (e.g. constant color regions).
-      for (; i < pix_count - 1; ++i) {
-        int offset_next, len_next;
-        prev_cost = cost_manager->costs_[i - 1];
-
-        if (is_offset_zero) {
-          // No optimization can be made so we just push all of the
-          // contributions from i.
-          PushInterval(cost_manager, prev_cost, i, len);
-        } else {
-          // j_max is chosen as the smallest j such that:
-          //       max of cost_cache_ < j*offset cost + min of cost_cache_
-          // Therefore, the pixel influenced by i-j_max, cannot be influenced
-          // by i. Only the costs after the end of what i contributed need to be
-          // updated. cost_manager->interval_ends_ is a circular buffer that
-          // stores those ends.
-          const double distance_cost = prev_cost + offset_cost;
-          int j = cost_manager->interval_ends_[interval_ends_index];
-          if (i - first_i <= j_max ||
-              !IsCostCacheIntervalWritable(j, i + len)) {
-            PushInterval(cost_manager, distance_cost, i, len);
-          } else {
-            for (; j < i + len; ++j) {
-              UpdateCost(cost_manager, j, i, distance_cost);
-            }
-          }
-          // Store the new end in the circular buffer.
-          assert(interval_ends_index < cost_manager->interval_ends_size_);
-          cost_manager->interval_ends_[interval_ends_index] = i + len;
-          if (++interval_ends_index > j_max) interval_ends_index = 0;
-        }
-
-        // Check whether i is the last pixel to consider, as it is handled
-        // differently.
-        if (i + 1 >= pix_count - 1) break;
-        HashChainFindCopy(hash_chain, i + 1, &offset_next, &len_next);
-        if (offset_next != offset) break;
-        len = len_next;
-        UpdateCostPerIndex(cost_manager, i);
-        AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i,
-                                      use_color_cache, prev_cost,
-                                      cost_manager->costs_, dist_array);
-      }
-      // Submit the last pixel.
-      UpdateCostPerIndex(cost_manager, i + 1);
-
-      // This if is for speedup only. It roughly doubles the speed, and
-      // makes compression worse by .1 %.
-      if (len >= skip_length && code <= skip_min_distance_code) {
-        // Long copy for short distances, let's skip the middle
-        // lookups for better copies.
-        // 1) insert the hashes.
-        if (use_color_cache) {
-          int k;
-          for (k = 0; k < len; ++k) {
-            VP8LColorCacheInsert(&hashers, argb[i + k]);
-          }
-        }
-        // 2) jump.
-        {
-          const int i_next = i + len - 1;  // for loop does ++i, thus -1 here.
-          for (; i <= i_next; ++i) UpdateCostPerIndex(cost_manager, i + 1);
-          i = i_next;
-        }
-        goto next_symbol;
-      }
-      if (len > 2) {
-        // Also try the smallest interval possible (size 2).
-        double cost_total =
-            prev_cost + offset_cost + GetLengthCost(cost_model, 1);
-        if (cost_manager->costs_[i + 1] > cost_total) {
-          cost_manager->costs_[i + 1] = (float)cost_total;
-          dist_array[i + 1] = 2;
-        }
-      }
-    } else {
-      // The pixel is added as a single literal so just update the costs.
-      UpdateCostPerIndex(cost_manager, i + 1);
-    }
-
-    AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i,
-                                  use_color_cache, prev_cost,
-                                  cost_manager->costs_, dist_array);
-
- next_symbol: ;
-  }
-  // Handle the last pixel.
-  if (i == (pix_count - 1)) {
-    AddSingleLiteralWithCostModel(
-        argb + i, &hashers, cost_model, i, use_color_cache,
-        cost_manager->costs_[pix_count - 2], cost_manager->costs_, dist_array);
-  }
-
-  ok = !refs->error_;
- Error:
-  if (cc_init) VP8LColorCacheClear(&hashers);
-  CostManagerClear(cost_manager);
-  WebPSafeFree(cost_model);
-  WebPSafeFree(cost_manager);
-  return ok;
-}
-
-// We pack the path at the end of *dist_array and return
-// a pointer to this part of the array. Example:
-// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
-static void TraceBackwards(uint16_t* const dist_array,
-                           int dist_array_size,
-                           uint16_t** const chosen_path,
-                           int* const chosen_path_size) {
-  uint16_t* path = dist_array + dist_array_size;
-  uint16_t* cur = dist_array + dist_array_size - 1;
-  while (cur >= dist_array) {
-    const int k = *cur;
-    --path;
-    *path = k;
-    cur -= k;
-  }
-  *chosen_path = path;
-  *chosen_path_size = (int)(dist_array + dist_array_size - path);
-}
-
-static int BackwardReferencesHashChainFollowChosenPath(
-    const uint32_t* const argb, int cache_bits,
-    const uint16_t* const chosen_path, int chosen_path_size,
-    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) {
-  const int use_color_cache = (cache_bits > 0);
-  int ix;
-  int i = 0;
-  int ok = 0;
-  int cc_init = 0;
-  VP8LColorCache hashers;
-
-  if (use_color_cache) {
-    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-    if (!cc_init) goto Error;
-  }
-
-  ClearBackwardRefs(refs);
-  for (ix = 0; ix < chosen_path_size; ++ix) {
-    const int len = chosen_path[ix];
-    if (len != 1) {
-      int k;
-      const int offset = HashChainFindOffset(hash_chain, i);
-      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
-      if (use_color_cache) {
-        for (k = 0; k < len; ++k) {
-          VP8LColorCacheInsert(&hashers, argb[i + k]);
-        }
-      }
-      i += len;
-    } else {
-      PixOrCopy v;
-      const int idx =
-          use_color_cache ? VP8LColorCacheContains(&hashers, argb[i]) : -1;
-      if (idx >= 0) {
-        // use_color_cache is true and hashers contains argb[i]
-        // push pixel as a color cache index
-        v = PixOrCopyCreateCacheIdx(idx);
-      } else {
-        if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
-        v = PixOrCopyCreateLiteral(argb[i]);
-      }
-      BackwardRefsCursorAdd(refs, v);
-      ++i;
-    }
-  }
-  ok = !refs->error_;
- Error:
-  if (cc_init) VP8LColorCacheClear(&hashers);
-  return ok;
-}
-
-// Returns 1 on success.
-static int BackwardReferencesTraceBackwards(
-    int xsize, int ysize, const uint32_t* const argb, int quality,
-    int cache_bits, const VP8LHashChain* const hash_chain,
-    VP8LBackwardRefs* const refs) {
-  int ok = 0;
-  const int dist_array_size = xsize * ysize;
-  uint16_t* chosen_path = NULL;
-  int chosen_path_size = 0;
-  uint16_t* dist_array =
-      (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
-
-  if (dist_array == NULL) goto Error;
-
-  if (!BackwardReferencesHashChainDistanceOnly(
-      xsize, ysize, argb, quality, cache_bits, hash_chain,
-      refs, dist_array)) {
-    goto Error;
-  }
-  TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
-  if (!BackwardReferencesHashChainFollowChosenPath(
-          argb, cache_bits, chosen_path, chosen_path_size, hash_chain, refs)) {
-    goto Error;
-  }
-  ok = 1;
- Error:
-  WebPSafeFree(dist_array);
-  return ok;
-}
-
-static void BackwardReferences2DLocality(int xsize,
-                                         const VP8LBackwardRefs* const refs) {
-  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
-  while (VP8LRefsCursorOk(&c)) {
-    if (PixOrCopyIsCopy(c.cur_pos)) {
-      const int dist = c.cur_pos->argb_or_distance;
-      const int transformed_dist = DistanceToPlaneCode(xsize, dist);
-      c.cur_pos->argb_or_distance = transformed_dist;
-    }
-    VP8LRefsCursorNext(&c);
-  }
-}
-
-// Computes the entropies for a color cache size (in bits) between 0 (unused)
-// and cache_bits_max (inclusive).
-// Returns 1 on success, 0 in case of allocation error.
-static int ComputeCacheEntropies(const uint32_t* argb,
-                                 const VP8LBackwardRefs* const refs,
-                                 int cache_bits_max, double entropies[]) {
-  int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
-  VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
-  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
-  VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
-  int ok = 0;
-  int i;
-
-  for (i = 0; i <= cache_bits_max; ++i) {
-    histos[i] = VP8LAllocateHistogram(i);
-    if (histos[i] == NULL) goto Error;
-    if (i == 0) continue;
-    cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
-    if (!cc_init[i]) goto Error;
-  }
-
-  assert(cache_bits_max >= 0);
-  // Do not use the color cache for cache_bits=0.
-  while (VP8LRefsCursorOk(&c)) {
-    VP8LHistogramAddSinglePixOrCopy(histos[0], c.cur_pos);
-    VP8LRefsCursorNext(&c);
-  }
-  if (cache_bits_max > 0) {
-    c = VP8LRefsCursorInit(refs);
-    while (VP8LRefsCursorOk(&c)) {
-      const PixOrCopy* const v = c.cur_pos;
-      if (PixOrCopyIsLiteral(v)) {
-        const uint32_t pix = *argb++;
-        // The keys of the caches can be derived from the longest one.
-        int key = HashPix(pix, 32 - cache_bits_max);
-        for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
-          if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
-            ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
-          } else {
-            VP8LColorCacheSet(&hashers[i], key, pix);
-            ++histos[i]->blue_[pix & 0xff];
-            ++histos[i]->literal_[(pix >> 8) & 0xff];
-            ++histos[i]->red_[(pix >> 16) & 0xff];
-            ++histos[i]->alpha_[pix >> 24];
-          }
-        }
-      } else {
-        // Update the histograms for distance/length.
-        int len = PixOrCopyLength(v);
-        int code_dist, code_len, extra_bits;
-        uint32_t argb_prev = *argb ^ 0xffffffffu;
-        VP8LPrefixEncodeBits(len, &code_len, &extra_bits);
-        VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code_dist, &extra_bits);
-        for (i = 1; i <= cache_bits_max; ++i) {
-          ++histos[i]->literal_[NUM_LITERAL_CODES + code_len];
-          ++histos[i]->distance_[code_dist];
-        }
-        // Update the colors caches.
-        do {
-          if (*argb != argb_prev) {
-            // Efficiency: insert only if the color changes.
-            int key = HashPix(*argb, 32 - cache_bits_max);
-            for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
-              hashers[i].colors_[key] = *argb;
-            }
-            argb_prev = *argb;
-          }
-          argb++;
-        } while (--len != 0);
-      }
-      VP8LRefsCursorNext(&c);
-    }
-  }
-  for (i = 0; i <= cache_bits_max; ++i) {
-    entropies[i] = VP8LHistogramEstimateBits(histos[i]);
-  }
-  ok = 1;
-Error:
-  for (i = 0; i <= cache_bits_max; ++i) {
-    if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
-    VP8LFreeHistogram(histos[i]);
-  }
-  return ok;
-}
-
-// Evaluate optimal cache bits for the local color cache.
-// The input *best_cache_bits sets the maximum cache bits to use (passing 0
-// implies disabling the local color cache). The local color cache is also
-// disabled for the lower (<= 25) quality.
-// Returns 0 in case of memory error.
-static int CalculateBestCacheSize(const uint32_t* const argb,
-                                  int xsize, int ysize, int quality,
-                                  const VP8LHashChain* const hash_chain,
-                                  VP8LBackwardRefs* const refs,
-                                  int* const lz77_computed,
-                                  int* const best_cache_bits) {
-  int i;
-  int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
-  double entropy_min = MAX_ENTROPY;
-  double entropies[MAX_COLOR_CACHE_BITS + 1];
-
-  assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
-
-  *lz77_computed = 0;
-  if (cache_bits_high == 0) {
-    *best_cache_bits = 0;
-    // Local color cache is disabled.
-    return 1;
-  }
-  // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color cache
-  // is not that different in practice.
-  if (!BackwardReferencesLz77(xsize, ysize, argb, 0, hash_chain, refs)) {
-    return 0;
-  }
-  // Find the cache_bits giving the lowest entropy. The search is done in a
-  // brute-force way as the function (entropy w.r.t cache_bits) can be
-  // anything in practice.
-  if (!ComputeCacheEntropies(argb, refs, cache_bits_high, entropies)) {
-    return 0;
-  }
-  for (i = 0; i <= cache_bits_high; ++i) {
-    if (i == 0 || entropies[i] < entropy_min) {
-      entropy_min = entropies[i];
-      *best_cache_bits = i;
-    }
-  }
-  return 1;
-}
-
-// Update (in-place) backward references for specified cache_bits.
-static int BackwardRefsWithLocalCache(const uint32_t* const argb,
-                                      int cache_bits,
-                                      VP8LBackwardRefs* const refs) {
-  int pixel_index = 0;
-  VP8LColorCache hashers;
-  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
-  if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
-
-  while (VP8LRefsCursorOk(&c)) {
-    PixOrCopy* const v = c.cur_pos;
-    if (PixOrCopyIsLiteral(v)) {
-      const uint32_t argb_literal = v->argb_or_distance;
-      const int ix = VP8LColorCacheContains(&hashers, argb_literal);
-      if (ix >= 0) {
-        // hashers contains argb_literal
-        *v = PixOrCopyCreateCacheIdx(ix);
-      } else {
-        VP8LColorCacheInsert(&hashers, argb_literal);
-      }
-      ++pixel_index;
-    } else {
-      // refs was created without local cache, so it can not have cache indexes.
-      int k;
-      assert(PixOrCopyIsCopy(v));
-      for (k = 0; k < v->len; ++k) {
-        VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
-      }
-    }
-    VP8LRefsCursorNext(&c);
-  }
-  VP8LColorCacheClear(&hashers);
-  return 1;
-}
-
-static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
-    int width, int height, const uint32_t* const argb,
-    int* const cache_bits, const VP8LHashChain* const hash_chain,
-    VP8LBackwardRefs refs_array[2]) {
-  VP8LBackwardRefs* refs_lz77 = &refs_array[0];
-  *cache_bits = 0;
-  if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) {
-    return NULL;
-  }
-  BackwardReferences2DLocality(width, refs_lz77);
-  return refs_lz77;
-}
-
-static VP8LBackwardRefs* GetBackwardReferences(
-    int width, int height, const uint32_t* const argb, int quality,
-    int* const cache_bits, const VP8LHashChain* const hash_chain,
-    VP8LBackwardRefs refs_array[2]) {
-  int lz77_is_useful;
-  int lz77_computed;
-  double bit_cost_lz77, bit_cost_rle;
-  VP8LBackwardRefs* best = NULL;
-  VP8LBackwardRefs* refs_lz77 = &refs_array[0];
-  VP8LBackwardRefs* refs_rle = &refs_array[1];
-  VP8LHistogram* histo = NULL;
-
-  if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain,
-                              refs_lz77, &lz77_computed, cache_bits)) {
-    goto Error;
-  }
-
-  if (lz77_computed) {
-    // Transform refs_lz77 for the optimized cache_bits.
-    if (*cache_bits > 0) {
-      if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) {
-        goto Error;
-      }
-    }
-  } else {
-    if (!BackwardReferencesLz77(width, height, argb, *cache_bits, hash_chain,
-                                refs_lz77)) {
-      goto Error;
-    }
-  }
-
-  if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) {
-    goto Error;
-  }
-
-  histo = VP8LAllocateHistogram(*cache_bits);
-  if (histo == NULL) goto Error;
-
-  {
-    // Evaluate LZ77 coding.
-    VP8LHistogramCreate(histo, refs_lz77, *cache_bits);
-    bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
-    // Evaluate RLE coding.
-    VP8LHistogramCreate(histo, refs_rle, *cache_bits);
-    bit_cost_rle = VP8LHistogramEstimateBits(histo);
-    // Decide if LZ77 is useful.
-    lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
-  }
-
-  // Choose appropriate backward reference.
-  if (lz77_is_useful) {
-    // TraceBackwards is costly. Don't execute it at lower quality.
-    const int try_lz77_trace_backwards = (quality >= 25);
-    best = refs_lz77;   // default guess: lz77 is better
-    if (try_lz77_trace_backwards) {
-      VP8LBackwardRefs* const refs_trace = refs_rle;
-      if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) {
-        best = NULL;
-        goto Error;
-      }
-      if (BackwardReferencesTraceBackwards(width, height, argb, quality,
-                                           *cache_bits, hash_chain,
-                                           refs_trace)) {
-        double bit_cost_trace;
-        // Evaluate LZ77 coding.
-        VP8LHistogramCreate(histo, refs_trace, *cache_bits);
-        bit_cost_trace = VP8LHistogramEstimateBits(histo);
-        if (bit_cost_trace < bit_cost_lz77) {
-          best = refs_trace;
-        }
-      }
-    }
-  } else {
-    best = refs_rle;
-  }
-
-  BackwardReferences2DLocality(width, best);
-
- Error:
-  VP8LFreeHistogram(histo);
-  return best;
-}
-
-VP8LBackwardRefs* VP8LGetBackwardReferences(
-    int width, int height, const uint32_t* const argb, int quality,
-    int low_effort, int* const cache_bits,
-    const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) {
-  if (low_effort) {
-    return GetBackwardReferencesLowEffort(width, height, argb, cache_bits,
-                                          hash_chain, refs_array);
-  } else {
-    return GetBackwardReferences(width, height, argb, quality, cache_bits,
-                                 hash_chain, refs_array);
-  }
-}
diff --git a/thirdparty/libwebp/dec/alpha_dec.c b/thirdparty/libwebp/src/dec/alpha_dec.c
index 83ffd4b609..bce735bfc2 100644
--- a/thirdparty/libwebp/dec/alpha_dec.c
+++ b/thirdparty/libwebp/src/dec/alpha_dec.c
@@ -12,13 +12,13 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
-#include "./alphai_dec.h"
-#include "./vp8i_dec.h"
-#include "./vp8li_dec.h"
-#include "../dsp/dsp.h"
-#include "../utils/quant_levels_dec_utils.h"
-#include "../utils/utils.h"
-#include "../webp/format_constants.h"
+#include "src/dec/alphai_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/quant_levels_dec_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
 
 //------------------------------------------------------------------------------
 // ALPHDecoder object.
diff --git a/thirdparty/libwebp/dec/alphai_dec.h b/thirdparty/libwebp/src/dec/alphai_dec.h
index 561e8151ee..e0fa281a55 100644
--- a/thirdparty/libwebp/dec/alphai_dec.h
+++ b/thirdparty/libwebp/src/dec/alphai_dec.h
@@ -11,11 +11,11 @@
 //
 // Author: Urvang (urvang@google.com)
 
-#ifndef WEBP_DEC_ALPHAI_H_
-#define WEBP_DEC_ALPHAI_H_
+#ifndef WEBP_DEC_ALPHAI_DEC_H_
+#define WEBP_DEC_ALPHAI_DEC_H_
 
-#include "./webpi_dec.h"
-#include "../utils/filters_utils.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/filters_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_DEC_ALPHAI_H_ */
+#endif  /* WEBP_DEC_ALPHAI_DEC_H_ */
diff --git a/thirdparty/libwebp/dec/buffer_dec.c b/thirdparty/libwebp/src/dec/buffer_dec.c
index c685fd5646..75eb3c40b4 100644
--- a/thirdparty/libwebp/dec/buffer_dec.c
+++ b/thirdparty/libwebp/src/dec/buffer_dec.c
@@ -13,15 +13,15 @@
 
 #include <stdlib.h>
 
-#include "./vp8i_dec.h"
-#include "./webpi_dec.h"
-#include "../utils/utils.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // WebPDecBuffer
 
 // Number of bytes per pixel for the different color-spaces.
-static const int kModeBpp[MODE_LAST] = {
+static const uint8_t kModeBpp[MODE_LAST] = {
   3, 4, 3, 4, 4, 2, 2,
   4, 4, 4, 2,    // pre-multiplied modes
   1, 1 };
@@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
 // strictly speaking, the very last (or first, if flipped) row
 // doesn't require padding.
 #define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
-    (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
+    ((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
 
 static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
   int ok = 1;
@@ -98,9 +98,14 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
     uint64_t uv_size = 0, a_size = 0, total_size;
     // We need memory and it hasn't been allocated yet.
     // => initialize output buffer, now that dimensions are known.
-    const int stride = w * kModeBpp[mode];
-    const uint64_t size = (uint64_t)stride * h;
+    int stride;
+    uint64_t size;
 
+    if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
+      return VP8_STATUS_INVALID_PARAM;
+    }
+    stride = w * kModeBpp[mode];
+    size = (uint64_t)stride * h;
     if (!WebPIsRGBMode(mode)) {
       uv_stride = (w + 1) / 2;
       uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
@@ -169,11 +174,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
   return VP8_STATUS_OK;
 }
 
-VP8StatusCode WebPAllocateDecBuffer(int w, int h,
+VP8StatusCode WebPAllocateDecBuffer(int width, int height,
                                     const WebPDecoderOptions* const options,
-                                    WebPDecBuffer* const out) {
+                                    WebPDecBuffer* const buffer) {
   VP8StatusCode status;
-  if (out == NULL || w <= 0 || h <= 0) {
+  if (buffer == NULL || width <= 0 || height <= 0) {
     return VP8_STATUS_INVALID_PARAM;
   }
   if (options != NULL) {    // First, apply options if there is any.
@@ -182,33 +187,39 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
       const int ch = options->crop_height;
       const int x = options->crop_left & ~1;
       const int y = options->crop_top & ~1;
-      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
+      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
+          x + cw > width || y + ch > height) {
         return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
       }
-      w = cw;
-      h = ch;
+      width = cw;
+      height = ch;
     }
+
     if (options->use_scaling) {
+#if !defined(WEBP_REDUCE_SIZE)
       int scaled_width = options->scaled_width;
       int scaled_height = options->scaled_height;
       if (!WebPRescalerGetScaledDimensions(
-              w, h, &scaled_width, &scaled_height)) {
+              width, height, &scaled_width, &scaled_height)) {
         return VP8_STATUS_INVALID_PARAM;
       }
-      w = scaled_width;
-      h = scaled_height;
+      width = scaled_width;
+      height = scaled_height;
+#else
+      return VP8_STATUS_INVALID_PARAM;   // rescaling not supported
+#endif
     }
   }
-  out->width = w;
-  out->height = h;
+  buffer->width = width;
+  buffer->height = height;
 
   // Then, allocate buffer for real.
-  status = AllocateBuffer(out);
+  status = AllocateBuffer(buffer);
   if (status != VP8_STATUS_OK) return status;
 
   // Use the stride trick if vertical flip is needed.
   if (options != NULL && options->flip) {
-    status = WebPFlipBuffer(out);
+    status = WebPFlipBuffer(buffer);
   }
   return status;
 }
diff --git a/thirdparty/libwebp/dec/common_dec.h b/thirdparty/libwebp/src/dec/common_dec.h
index 6961e22470..9995f1a51a 100644
--- a/thirdparty/libwebp/dec/common_dec.h
+++ b/thirdparty/libwebp/src/dec/common_dec.h
@@ -11,8 +11,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_DEC_COMMON_H_
-#define WEBP_DEC_COMMON_H_
+#ifndef WEBP_DEC_COMMON_DEC_H_
+#define WEBP_DEC_COMMON_DEC_H_
 
 // intra prediction modes
 enum { B_DC_PRED = 0,   // 4x4 modes
@@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
        NUM_PROBAS = 11
      };
 
-#endif    // WEBP_DEC_COMMON_H_
+#endif    // WEBP_DEC_COMMON_DEC_H_
diff --git a/thirdparty/libwebp/dec/frame_dec.c b/thirdparty/libwebp/src/dec/frame_dec.c
index f91e27f7c8..517d0f5850 100644
--- a/thirdparty/libwebp/dec/frame_dec.c
+++ b/thirdparty/libwebp/src/dec/frame_dec.c
@@ -12,13 +12,13 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
-#include "./vp8i_dec.h"
-#include "../utils/utils.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Main reconstruction function.
 
-static const int kScan[16] = {
+static const uint16_t kScan[16] = {
   0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
   0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
   0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
@@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
 #define MIN_DITHER_AMP 4
 
 #define DITHER_AMP_TAB_SIZE 12
-static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
   // roughly, it's dqm->uv_mat_[1]
   8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
 };
@@ -728,7 +728,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
   }
 
   mem = (uint8_t*)dec->mem_;
-  dec->intra_t_ = (uint8_t*)mem;
+  dec->intra_t_ = mem;
   mem += intra_pred_mode_size;
 
   dec->yuv_t_ = (VP8TopSamples*)mem;
@@ -750,7 +750,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
 
   mem = (uint8_t*)WEBP_ALIGN(mem);
   assert((yuv_size & WEBP_ALIGN_CST) == 0);
-  dec->yuv_b_ = (uint8_t*)mem;
+  dec->yuv_b_ = mem;
   mem += yuv_size;
 
   dec->mb_data_ = (VP8MBData*)mem;
@@ -766,7 +766,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
     const int extra_rows = kFilterExtraRows[dec->filter_type_];
     const int extra_y = extra_rows * dec->cache_y_stride_;
     const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
-    dec->cache_y_ = ((uint8_t*)mem) + extra_y;
+    dec->cache_y_ = mem + extra_y;
     dec->cache_u_ = dec->cache_y_
                   + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
     dec->cache_v_ = dec->cache_u_
@@ -776,7 +776,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
   mem += cache_size;
 
   // alpha plane
-  dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
+  dec->alpha_plane_ = alpha_size ? mem : NULL;
   mem += alpha_size;
   assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
 
diff --git a/thirdparty/libwebp/dec/idec_dec.c b/thirdparty/libwebp/src/dec/idec_dec.c
index 78fb2e7186..a371ed7500 100644
--- a/thirdparty/libwebp/dec/idec_dec.c
+++ b/thirdparty/libwebp/src/dec/idec_dec.c
@@ -15,10 +15,10 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include "./alphai_dec.h"
-#include "./webpi_dec.h"
-#include "./vp8i_dec.h"
-#include "../utils/utils.h"
+#include "src/dec/alphai_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
 
 // In append mode, buffer allocations increase as multiples of this value.
 // Needs to be a power of 2.
@@ -673,12 +673,12 @@ void WebPIDelete(WebPIDecoder* idec) {
 //------------------------------------------------------------------------------
 // Wrapper toward WebPINewDecoder
 
-WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
                           size_t output_buffer_size, int output_stride) {
   const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
   WebPIDecoder* idec;
 
-  if (mode >= MODE_YUV) return NULL;
+  if (csp >= MODE_YUV) return NULL;
   if (is_external_memory == 0) {    // Overwrite parameters to sane values.
     output_buffer_size = 0;
     output_stride = 0;
@@ -689,7 +689,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
   }
   idec = WebPINewDecoder(NULL);
   if (idec == NULL) return NULL;
-  idec->output_.colorspace = mode;
+  idec->output_.colorspace = csp;
   idec->output_.is_external_memory = is_external_memory;
   idec->output_.u.RGBA.rgba = output_buffer;
   idec->output_.u.RGBA.stride = output_stride;
diff --git a/thirdparty/libwebp/dec/io_dec.c b/thirdparty/libwebp/src/dec/io_dec.c
index 8bfab86959..e603f19c98 100644
--- a/thirdparty/libwebp/dec/io_dec.c
+++ b/thirdparty/libwebp/src/dec/io_dec.c
@@ -13,11 +13,11 @@
 
 #include <assert.h>
 #include <stdlib.h>
-#include "../dec/vp8i_dec.h"
-#include "./webpi_dec.h"
-#include "../dsp/dsp.h"
-#include "../dsp/yuv.h"
-#include "../utils/utils.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/yuv.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     uint8_t* alpha_dst = base_rgba;
 #else
     uint8_t* alpha_dst = base_rgba + 1;
@@ -241,6 +241,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
 //------------------------------------------------------------------------------
 // YUV rescaling (no final RGB conversion needed)
 
+#if !defined(WEBP_REDUCE_SIZE)
 static int Rescale(const uint8_t* src, int src_stride,
                    int new_lines, WebPRescaler* const wrk) {
   int num_lines_out = 0;
@@ -431,7 +432,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
                                int max_lines_out) {
   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
   uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   uint8_t* alpha_dst = base_rgba;
 #else
   uint8_t* alpha_dst = base_rgba + 1;
@@ -541,6 +542,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
   return 1;
 }
 
+#endif  // WEBP_REDUCE_SIZE
+
 //------------------------------------------------------------------------------
 // Default custom functions
 
@@ -561,10 +564,14 @@ static int CustomSetup(VP8Io* io) {
     WebPInitUpsamplers();
   }
   if (io->use_scaling) {
+#if !defined(WEBP_REDUCE_SIZE)
     const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
     if (!ok) {
       return 0;    // memory error
     }
+#else
+    return 0;   // rescaling support not compiled
+#endif
   } else {
     if (is_rgb) {
       WebPInitSamplers();
@@ -598,9 +605,6 @@ static int CustomSetup(VP8Io* io) {
     }
   }
 
-  if (is_rgb) {
-    VP8YUVInit();
-  }
   return 1;
 }
 
diff --git a/thirdparty/libwebp/dec/quant_dec.c b/thirdparty/libwebp/src/dec/quant_dec.c
index 14e3198946..f07212ad73 100644
--- a/thirdparty/libwebp/dec/quant_dec.c
+++ b/thirdparty/libwebp/src/dec/quant_dec.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./vp8i_dec.h"
+#include "src/dec/vp8i_dec.h"
 
 static WEBP_INLINE int clip(int v, int M) {
   return v < 0 ? 0 : v > M ? M : v;
diff --git a/thirdparty/libwebp/dec/tree_dec.c b/thirdparty/libwebp/src/dec/tree_dec.c
index 9e805f60f3..3f5a957d32 100644
--- a/thirdparty/libwebp/dec/tree_dec.c
+++ b/thirdparty/libwebp/src/dec/tree_dec.c
@@ -11,15 +11,19 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./vp8i_dec.h"
-#include "../utils/bit_reader_inl_utils.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/bit_reader_inl_utils.h"
 
+#if !defined(USE_GENERIC_TREE)
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
-#define USE_GENERIC_TREE
+#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
+#else
+#define USE_GENERIC_TREE 0
 #endif
+#endif  // USE_GENERIC_TREE
 
-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
 static const int8_t kYModesIntra4[18] = {
   -B_DC_PRED, 1,
     -B_TM_PRED, 2,
@@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
       int x;
       for (x = 0; x < 4; ++x) {
         const uint8_t* const prob = kBModesProba[top[x]][ymode];
-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
         // Generic tree-parsing
         int i = kYModesIntra4[VP8GetBit(br, prob[0])];
         while (i > 0) {
@@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br,
                         (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
                           (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
                             (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
-#endif    // USE_GENERIC_TREE
+#endif  // USE_GENERIC_TREE
         top[x] = ymode;
       }
       memcpy(modes, top, 4 * sizeof(*top));
@@ -498,7 +502,7 @@ static const uint8_t
 
 // Paragraph 9.9
 
-static const int kBands[16 + 1] = {
+static const uint8_t kBands[16 + 1] = {
   0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
   0  // extra entry as sentinel
 };
diff --git a/thirdparty/libwebp/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c
index fad8d9cf35..6212efd179 100644
--- a/thirdparty/libwebp/dec/vp8_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8_dec.c
@@ -13,12 +13,12 @@
 
 #include <stdlib.h>
 
-#include "./alphai_dec.h"
-#include "./vp8i_dec.h"
-#include "./vp8li_dec.h"
-#include "./webpi_dec.h"
-#include "../utils/bit_reader_inl_utils.h"
-#include "../utils/utils.h"
+#include "src/dec/alphai_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/bit_reader_inl_utils.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/dec/vp8_dec.h b/thirdparty/libwebp/src/dec/vp8_dec.h
index b9337bbec0..ca85b340cf 100644
--- a/thirdparty/libwebp/dec/vp8_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8_dec.h
@@ -11,10 +11,10 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_WEBP_DECODE_VP8_H_
-#define WEBP_WEBP_DECODE_VP8_H_
+#ifndef WEBP_DEC_VP8_DEC_H_
+#define WEBP_DEC_VP8_DEC_H_
 
-#include "../webp/decode.h"
+#include "src/webp/decode.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,7 +33,7 @@ extern "C" {
 //   /* customize io's functions (setup()/put()/teardown()) if needed. */
 //
 //   VP8Decoder* dec = VP8New();
-//   bool ok = VP8Decode(dec);
+//   int ok = VP8Decode(dec, &io);
 //   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
 //   VP8Delete(dec);
 //   return ok;
@@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
 // Miscellaneous VP8/VP8L bitstream probing functions.
 
 // Returns true if the next 3 bytes in data contain the VP8 signature.
-WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
+WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
 
 // Validates the VP8 data-header and retrieves basic header information viz
 // width and height. Returns 0 in case of formatting error. *width/*height
 // can be passed NULL.
-WEBP_EXTERN(int) VP8GetInfo(
+WEBP_EXTERN int VP8GetInfo(
     const uint8_t* data,
     size_t data_size,    // data available so far
     size_t chunk_size,   // total data size expected in the chunk
     int* const width, int* const height);
 
 // Returns true if the next byte(s) in data is a VP8L signature.
-WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
+WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
 
 // Validates the VP8L data-header and retrieves basic header information viz
 // width, height and alpha. Returns 0 in case of formatting error.
 // width/height/has_alpha can be passed NULL.
-WEBP_EXTERN(int) VP8LGetInfo(
+WEBP_EXTERN int VP8LGetInfo(
     const uint8_t* data, size_t data_size,  // data available so far
     int* const width, int* const height, int* const has_alpha);
 
@@ -182,4 +182,4 @@ WEBP_EXTERN(int) VP8LGetInfo(
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_WEBP_DECODE_VP8_H_ */
+#endif  /* WEBP_DEC_VP8_DEC_H_ */
diff --git a/thirdparty/libwebp/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h
index 555853e8f8..28244d9d7a 100644
--- a/thirdparty/libwebp/dec/vp8i_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8i_dec.h
@@ -11,16 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_DEC_VP8I_H_
-#define WEBP_DEC_VP8I_H_
+#ifndef WEBP_DEC_VP8I_DEC_H_
+#define WEBP_DEC_VP8I_DEC_H_
 
 #include <string.h>     // for memcpy()
-#include "./common_dec.h"
-#include "./vp8li_dec.h"
-#include "../utils/bit_reader_utils.h"
-#include "../utils/random_utils.h"
-#include "../utils/thread_utils.h"
-#include "../dsp/dsp.h"
+#include "src/dec/common_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/random_utils.h"
+#include "src/utils/thread_utils.h"
+#include "src/dsp/dsp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 0
 #define DEC_MIN_VERSION 6
-#define DEC_REV_VERSION 0
+#define DEC_REV_VERSION 1
 
 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
@@ -57,7 +57,6 @@ extern "C" {
 //  '|' = left sample,   '-' = top sample,    '+' = top-left sample
 //  't' = extra top-right sample for 4x4 modes
 #define YUV_SIZE (BPS * 17 + BPS * 9)
-#define Y_SIZE   (BPS * 17)
 #define Y_OFF    (BPS * 1 + 8)
 #define U_OFF    (Y_OFF + BPS * 16 + BPS)
 #define V_OFF    (U_OFF + 16)
@@ -317,4 +316,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_DEC_VP8I_H_ */
+#endif  /* WEBP_DEC_VP8I_DEC_H_ */
diff --git a/thirdparty/libwebp/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c
index ef359a91f0..42ea3b5e4c 100644
--- a/thirdparty/libwebp/dec/vp8l_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8l_dec.c
@@ -14,22 +14,22 @@
 
 #include <stdlib.h>
 
-#include "./alphai_dec.h"
-#include "./vp8li_dec.h"
-#include "../dsp/dsp.h"
-#include "../dsp/lossless.h"
-#include "../dsp/lossless_common.h"
-#include "../dsp/yuv.h"
-#include "../utils/endian_inl_utils.h"
-#include "../utils/huffman_utils.h"
-#include "../utils/utils.h"
+#include "src/dec/alphai_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/dsp/yuv.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/utils/huffman_utils.h"
+#include "src/utils/utils.h"
 
 #define NUM_ARGB_CACHE_ROWS          16
 
 static const int kCodeLengthLiterals = 16;
 static const int kCodeLengthRepeatCode = 16;
-static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
-static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
 
 // -----------------------------------------------------------------------------
 //  Five Huffman codes are used at each meta code:
@@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // All values computed for 8-bit first level lookup with Mark Adler's tool:
 // http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
-static const int kTableSize[12] = {
+static const uint16_t kTableSize[12] = {
   FIXED_TABLE_SIZE + 654,
   FIXED_TABLE_SIZE + 656,
   FIXED_TABLE_SIZE + 658,
@@ -485,6 +485,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
 //------------------------------------------------------------------------------
 // Scaling.
 
+#if !defined(WEBP_REDUCE_SIZE)
 static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
   const int num_channels = 4;
   const int in_width = io->mb_w;
@@ -516,10 +517,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
                    out_width, out_height, 0, num_channels, work);
   return 1;
 }
+#endif   // WEBP_REDUCE_SIZE
 
 //------------------------------------------------------------------------------
 // Export to ARGB
 
+#if !defined(WEBP_REDUCE_SIZE)
+
 // We have special "export" function since we need to convert from BGRA
 static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
                   int rgba_stride, uint8_t* const rgba) {
@@ -561,6 +565,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
   return num_lines_out;
 }
 
+#endif   // WEBP_REDUCE_SIZE
+
 // Emit rows without any scaling.
 static int EmitRows(WEBP_CSP_MODE colorspace,
                     const uint8_t* row_in, int in_stride,
@@ -746,9 +752,12 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
       if (WebPIsRGBMode(output->colorspace)) {  // convert to RGBA
         const WebPRGBABuffer* const buf = &output->u.RGBA;
         uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
-        const int num_rows_out = io->use_scaling ?
+        const int num_rows_out =
+#if !defined(WEBP_REDUCE_SIZE)
+         io->use_scaling ?
             EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
                                  rgba, buf->stride) :
+#endif  // WEBP_REDUCE_SIZE
             EmitRows(output->colorspace, rows_data, in_stride,
                      io->mb_w, io->mb_h, rgba, buf->stride);
         // Update 'last_out_row_'.
@@ -1012,12 +1021,13 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
       ok = 0;
       goto End;
     }
-    assert(br->eos_ == VP8LIsEndOfStream(br));
+    br->eos_ = VP8LIsEndOfStream(br);
   }
   // Process the remaining rows corresponding to last row-block.
   ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
 
  End:
+  br->eos_ = VP8LIsEndOfStream(br);
   if (!ok || (br->eos_ && pos < end)) {
     ok = 0;
     dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
@@ -1090,11 +1100,12 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
     VP8LFillBitWindow(br);
     if (htree_group->use_packed_table) {
       code = ReadPackedSymbols(htree_group, br, src);
+      if (VP8LIsEndOfStream(br)) break;
       if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne;
     } else {
       code = ReadSymbol(htree_group->htrees[GREEN], br);
     }
-    if (br->eos_) break;  // early out
+    if (VP8LIsEndOfStream(br)) break;
     if (code < NUM_LITERAL_CODES) {  // Literal
       if (htree_group->is_trivial_literal) {
         *src = htree_group->literal_arb | (code << 8);
@@ -1104,7 +1115,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
         VP8LFillBitWindow(br);
         blue = ReadSymbol(htree_group->htrees[BLUE], br);
         alpha = ReadSymbol(htree_group->htrees[ALPHA], br);
-        if (br->eos_) break;
+        if (VP8LIsEndOfStream(br)) break;
         *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue;
       }
     AdvanceByOne:
@@ -1132,7 +1143,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
       VP8LFillBitWindow(br);
       dist_code = GetCopyDistance(dist_symbol, br);
       dist = PlaneCodeToDistance(width, dist_code);
-      if (br->eos_) break;
+      if (VP8LIsEndOfStream(br)) break;
       if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
         goto Error;
       } else {
@@ -1169,9 +1180,9 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
     } else {  // Not reached
       goto Error;
     }
-    assert(br->eos_ == VP8LIsEndOfStream(br));
   }
 
+  br->eos_ = VP8LIsEndOfStream(br);
   if (dec->incremental_ && br->eos_ && src < src_end) {
     RestoreState(dec);
   } else if (!br->eos_) {
@@ -1630,12 +1641,19 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
 
     if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
 
+#if !defined(WEBP_REDUCE_SIZE)
     if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
 
     if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
       // need the alpha-multiply functions for premultiplied output or rescaling
       WebPInitAlphaProcessing();
     }
+#else
+    if (io->use_scaling) {
+      dec->status_ = VP8_STATUS_INVALID_PARAM;
+      goto Err;
+    }
+#endif
     if (!WebPIsRGBMode(dec->output_->colorspace)) {
       WebPInitConvertARGBToYUV();
       if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
diff --git a/thirdparty/libwebp/dec/vp8li_dec.h b/thirdparty/libwebp/src/dec/vp8li_dec.h
index 097a9d0589..8e500cf9ff 100644
--- a/thirdparty/libwebp/dec/vp8li_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8li_dec.h
@@ -12,14 +12,14 @@
 // Author: Skal (pascal.massimino@gmail.com)
 //         Vikas Arora(vikaas.arora@gmail.com)
 
-#ifndef WEBP_DEC_VP8LI_H_
-#define WEBP_DEC_VP8LI_H_
+#ifndef WEBP_DEC_VP8LI_DEC_H_
+#define WEBP_DEC_VP8LI_DEC_H_
 
 #include <string.h>     // for memcpy()
-#include "./webpi_dec.h"
-#include "../utils/bit_reader_utils.h"
-#include "../utils/color_cache_utils.h"
-#include "../utils/huffman_utils.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/huffman_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_DEC_VP8LI_H_ */
+#endif  /* WEBP_DEC_VP8LI_DEC_H_ */
diff --git a/thirdparty/libwebp/dec/webp_dec.c b/thirdparty/libwebp/src/dec/webp_dec.c
index a8e9c2c510..42d098874d 100644
--- a/thirdparty/libwebp/dec/webp_dec.c
+++ b/thirdparty/libwebp/src/dec/webp_dec.c
@@ -13,11 +13,11 @@
 
 #include <stdlib.h>
 
-#include "./vp8i_dec.h"
-#include "./vp8li_dec.h"
-#include "./webpi_dec.h"
-#include "../utils/utils.h"
-#include "../webp/mux_types.h"  // ALPHA_FLAG
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/utils.h"
+#include "src/webp/mux_types.h"  // ALPHA_FLAG
 
 //------------------------------------------------------------------------------
 // RIFF layout is:
@@ -421,7 +421,9 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
                                 NULL, NULL, NULL, &has_animation,
                                 NULL, headers);
   if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
-    // TODO(jzern): full support of animation frames will require API additions.
+    // The WebPDemux API + libwebp can be used to decode individual
+    // uncomposited frames or the WebPAnimDecoder can be used to fully
+    // reconstruct them (see webp/demux.h).
     if (has_animation) {
       status = VP8_STATUS_UNSUPPORTED_FEATURE;
     }
diff --git a/thirdparty/libwebp/dec/webpi_dec.h b/thirdparty/libwebp/src/dec/webpi_dec.h
index 696abc1958..c378ba6fc3 100644
--- a/thirdparty/libwebp/dec/webpi_dec.h
+++ b/thirdparty/libwebp/src/dec/webpi_dec.h
@@ -11,15 +11,15 @@
 //
 // Author: somnath@google.com (Somnath Banerjee)
 
-#ifndef WEBP_DEC_WEBPI_H_
-#define WEBP_DEC_WEBPI_H_
+#ifndef WEBP_DEC_WEBPI_DEC_H_
+#define WEBP_DEC_WEBPI_DEC_H_
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "../utils/rescaler_utils.h"
-#include "./vp8_dec.h"
+#include "src/utils/rescaler_utils.h"
+#include "src/dec/vp8_dec.h"
 
 //------------------------------------------------------------------------------
 // WebPDecParams: Decoding output parameters. Transient internal object.
@@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_DEC_WEBPI_H_ */
+#endif  /* WEBP_DEC_WEBPI_DEC_H_ */
diff --git a/thirdparty/libwebp/demux/anim_decode.c b/thirdparty/libwebp/src/demux/anim_decode.c
index f1cf176e72..05dd707371 100644
--- a/thirdparty/libwebp/demux/anim_decode.c
+++ b/thirdparty/libwebp/src/demux/anim_decode.c
@@ -11,15 +11,15 @@
 //
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
 #include <assert.h>
 #include <string.h>
 
-#include "../utils/utils.h"
-#include "../webp/decode.h"
-#include "../webp/demux.h"
+#include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/demux.h"
 
 #define NUM_CHANNELS 4
 
diff --git a/thirdparty/libwebp/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c
index 100eab8c01..79c24a5a7f 100644
--- a/thirdparty/libwebp/demux/demux.c
+++ b/thirdparty/libwebp/src/demux/demux.c
@@ -11,21 +11,21 @@
 //
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
-#include "../utils/utils.h"
-#include "../webp/decode.h"     // WebPGetFeatures
-#include "../webp/demux.h"
-#include "../webp/format_constants.h"
+#include "src/utils/utils.h"
+#include "src/webp/decode.h"     // WebPGetFeatures
+#include "src/webp/demux.h"
+#include "src/webp/format_constants.h"
 
 #define DMUX_MAJ_VERSION 0
 #define DMUX_MIN_VERSION 3
-#define DMUX_REV_VERSION 2
+#define DMUX_REV_VERSION 3
 
 typedef struct {
   size_t start_;        // start location of the data
@@ -205,12 +205,14 @@ static void SetFrameInfo(size_t start_offset, size_t size,
   frame->complete_ = complete;
 }
 
-// Store image bearing chunks to 'frame'.
+// Store image bearing chunks to 'frame'. 'min_size' is an optional size
+// requirement, it may be zero.
 static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
                               MemBuffer* const mem, Frame* const frame) {
   int alpha_chunks = 0;
   int image_chunks = 0;
-  int done = (MemDataSize(mem) < min_size);
+  int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
+              MemDataSize(mem) < min_size);
   ParseStatus status = PARSE_OK;
 
   if (done) return PARSE_NEED_MORE_DATA;
@@ -401,9 +403,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
   frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
   if (frame == NULL) return PARSE_ERROR;
 
-  // For the single image case we allow parsing of a partial frame, but we need
-  // at least CHUNK_HEADER_SIZE for parsing.
-  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
+  // For the single image case we allow parsing of a partial frame, so no
+  // minimum size is imposed here.
+  status = StoreFrame(1, 0, &dmux->mem_, frame);
   if (status != PARSE_ERROR) {
     const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
     // Clear any alpha when the alpha flag is missing.
diff --git a/thirdparty/libwebp/dsp/alpha_processing.c b/thirdparty/libwebp/src/dsp/alpha_processing.c
index 4b60e092be..590e3bc312 100644
--- a/thirdparty/libwebp/dsp/alpha_processing.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing.c
@@ -12,10 +12,13 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <assert.h>
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 // Tables can be faster on some platform but incur some extra binary size (~2k).
-// #define USE_TABLES_FOR_ALPHA_MULT
+#if !defined(USE_TABLES_FOR_ALPHA_MULT)
+#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
+#endif
+
 
 // -----------------------------------------------------------------------------
 
@@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
   return v;
 }
 
-#ifdef USE_TABLES_FOR_ALPHA_MULT
+#if (USE_TABLES_FOR_ALPHA_MULT == 1)
 
 static const uint32_t kMultTables[2][256] = {
   {    // (255u << MFIX) / alpha
@@ -132,9 +135,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
   return inverse ? (255u << MFIX) / a : a * KINV_255;
 }
 
-#endif    // USE_TABLES_FOR_ALPHA_MULT
+#endif  // USE_TABLES_FOR_ALPHA_MULT
 
-void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
+void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
   int x;
   for (x = 0; x < width; ++x) {
     const uint32_t argb = ptr[x];
@@ -154,8 +157,8 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
   }
 }
 
-void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
-                  int width, int inverse) {
+void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
+                   int width, int inverse) {
   int x;
   for (x = 0; x < width; ++x) {
     const uint32_t a = alpha[x];
@@ -217,8 +220,9 @@ void WebPMultRows(uint8_t* ptr, int stride,
 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
 #endif
 
-static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
-                               int w, int h, int stride) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
+                                 int w, int h, int stride) {
   while (h-- > 0) {
     uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
     const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@@ -235,6 +239,7 @@ static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
     rgba += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 #undef MULTIPLIER
 #undef PREMULTIPLY
 
@@ -254,9 +259,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
   return (x * m) >> 16;
 }
 
-static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
-                                               int w, int h, int stride,
-                                               int rg_byte_pos /* 0 or 1 */) {
+static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
+                                                 int w, int h, int stride,
+                                                 int rg_byte_pos /* 0 or 1 */) {
   while (h-- > 0) {
     int i;
     for (i = 0; i < w; ++i) {
@@ -275,15 +280,16 @@ static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
 }
 #undef MULTIPLIER
 
-static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
-                                   int w, int h, int stride) {
-#ifdef WEBP_SWAP_16BIT_CSP
-  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
+static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
+                                     int w, int h, int stride) {
+#if (WEBP_SWAP_16BIT_CSP == 1)
+  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
 #else
-  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
+  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
 #endif
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
                            int width, int height,
                            uint8_t* dst, int dst_stride) {
@@ -338,6 +344,36 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
   int i;
   for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//------------------------------------------------------------------------------
+
+static int HasAlpha8b_C(const uint8_t* src, int length) {
+  while (length-- > 0) if (*src++ != 0xff) return 1;
+  return 0;
+}
+
+static int HasAlpha32b_C(const uint8_t* src, int length) {
+  int x;
+  for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Simple channel manipulations.
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                      int len, int step, uint32_t* out) {
+  int i, offset = 0;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
+    offset += step;
+  }
+}
 
 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
 void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
@@ -345,6 +381,11 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
 int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
+void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out);
+
+int (*WebPHasAlpha8b)(const uint8_t* src, int length);
+int (*WebPHasAlpha32b)(const uint8_t* src, int length);
 
 //------------------------------------------------------------------------------
 // Init function
@@ -360,15 +401,21 @@ static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
   if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
 
-  WebPMultARGBRow = WebPMultARGBRowC;
-  WebPMultRow = WebPMultRowC;
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
-  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
+  WebPMultARGBRow = WebPMultARGBRow_C;
+  WebPMultRow = WebPMultRow_C;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
 
+  WebPPackRGB = PackRGB_C;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
   WebPDispatchAlpha = DispatchAlpha_C;
   WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
   WebPExtractAlpha = ExtractAlpha_C;
   WebPExtractGreen = ExtractGreen_C;
+#endif
+
+  WebPHasAlpha8b = HasAlpha8b_C;
+  WebPHasAlpha32b = HasAlpha32b_C;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -382,16 +429,31 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPInitAlphaProcessingNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitAlphaProcessingMIPSdspR2();
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitAlphaProcessingNEON();
+  }
+#endif
+
+  assert(WebPMultARGBRow != NULL);
+  assert(WebPMultRow != NULL);
+  assert(WebPApplyAlphaMultiply != NULL);
+  assert(WebPApplyAlphaMultiply4444 != NULL);
+  assert(WebPDispatchAlpha != NULL);
+  assert(WebPDispatchAlphaToGreen != NULL);
+  assert(WebPExtractAlpha != NULL);
+  assert(WebPExtractGreen != NULL);
+  assert(WebPPackRGB != NULL);
+  assert(WebPHasAlpha8b != NULL);
+  assert(WebPHasAlpha32b != NULL);
+
   alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/alpha_processing_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
index c631d78905..e0dc91bab9 100644
--- a/thirdparty/libwebp/dsp/alpha_processing_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -12,13 +12,13 @@
 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
 //            Djordje Pesut  (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
-                         int width, int height,
-                         uint8_t* dst, int dst_stride) {
+static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
+                                   int width, int height,
+                                   uint8_t* dst, int dst_stride) {
   uint32_t alpha_mask = 0xffffffff;
   int i, j, temp0;
 
@@ -79,7 +79,8 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
   return (alpha_mask != 0xff);
 }
 
-static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
+                                  int inverse) {
   int x;
   const uint32_t c_00ffffff = 0x00ffffffu;
   const uint32_t c_ff000000 = 0xff000000u;
@@ -124,14 +125,54 @@ static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
   }
 }
 
+static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
+                              const uint8_t* b, int len, int step,
+                              uint32_t* out) {
+  int temp0, temp1, temp2, offset;
+  const int rest = len & 1;
+  const int a = 0xff;
+  const uint32_t* const loop_end = out + len - rest;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
 //------------------------------------------------------------------------------
 // Entry point
 
 extern void WebPInitAlphaProcessingMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
-  WebPDispatchAlpha = DispatchAlpha;
-  WebPMultARGBRow = MultARGBRow;
+  WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
+  WebPMultARGBRow = MultARGBRow_MIPSdspR2;
+  WebPPackRGB = PackRGB_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
index 606a401cf7..9d55421704 100644
--- a/thirdparty/libwebp/dsp/alpha_processing_neon.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
@@ -11,11 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
-#include "./neon.h"
+#include "src/dsp/neon.h"
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/dsp/alpha_processing_sse2.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c
index 83dc559fac..76587006a1 100644
--- a/thirdparty/libwebp/dsp/alpha_processing_sse2.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c
@@ -11,16 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
 
 //------------------------------------------------------------------------------
 
-static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
-                         int width, int height,
-                         uint8_t* dst, int dst_stride) {
+static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
+                              int width, int height,
+                              uint8_t* dst, int dst_stride) {
   // alpha_and stores an 'and' operation of all the alpha[] values. The final
   // value is not 0xff if any of the alpha[] is not equal to 0xff.
   uint32_t alpha_and = 0xff;
@@ -72,9 +72,9 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
   return (alpha_and != 0xff);
 }
 
-static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
-                                 int width, int height,
-                                 uint32_t* dst, int dst_stride) {
+static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
+                                      int width, int height,
+                                      uint32_t* dst, int dst_stride) {
   int i, j;
   const __m128i zero = _mm_setzero_si128();
   const int limit = width & ~15;
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
   }
 }
 
-static int ExtractAlpha(const uint8_t* argb, int argb_stride,
-                        int width, int height,
-                        uint8_t* alpha, int alpha_stride) {
+static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
+                             int width, int height,
+                             uint8_t* alpha, int alpha_stride) {
   // alpha_and stores an 'and' operation of all the alpha[] values. The final
   // value is not 0xff if any of the alpha[] is not equal to 0xff.
   uint32_t alpha_and = 0xff;
@@ -210,6 +210,61 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
 #undef MULTIPLIER
 #undef PREMULTIPLY
 
+//------------------------------------------------------------------------------
+// Alpha detection
+
+static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
+  const __m128i all_0xff = _mm_set1_epi8(0xff);
+  int i = 0;
+  for (; i + 16 <= length; i += 16) {
+    const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i < length; ++i) if (src[i] != 0xff) return 1;
+  return 0;
+}
+
+static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
+  const __m128i alpha_mask = _mm_set1_epi32(0xff);
+  const __m128i all_0xff = _mm_set1_epi8(0xff);
+  int i = 0;
+  // We don't know if we can access the last 3 bytes after the last alpha
+  // value 'src[4 * length - 4]' (because we don't know if alpha is the first
+  // or the last byte of the quadruplet). Hence the '-3' protection below.
+  length = length * 4 - 3;   // size in bytes
+  for (; i + 64 <= length; i += 64) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
+    const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
+    const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
+    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
+    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
+    const __m128i b2 = _mm_and_si128(a2, alpha_mask);
+    const __m128i b3 = _mm_and_si128(a3, alpha_mask);
+    const __m128i c0 = _mm_packs_epi32(b0, b1);
+    const __m128i c1 = _mm_packs_epi32(b2, b3);
+    const __m128i d  = _mm_packus_epi16(c0, c1);
+    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i + 32 <= length; i += 32) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
+    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
+    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
+    const __m128i c  = _mm_packs_epi32(b0, b1);
+    const __m128i d  = _mm_packus_epi16(c, c);
+    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
+  return 0;
+}
+
 // -----------------------------------------------------------------------------
 // Apply alpha value to rows
 
@@ -238,7 +293,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
     }
   }
   width -= x;
-  if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
+  if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
 }
 
 static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
@@ -261,7 +316,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
     }
   }
   width -= x;
-  if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
+  if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
 }
 
 //------------------------------------------------------------------------------
@@ -273,9 +328,12 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
   WebPMultARGBRow = MultARGBRow_SSE2;
   WebPMultRow = MultRow_SSE2;
   WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
-  WebPDispatchAlpha = DispatchAlpha;
-  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
-  WebPExtractAlpha = ExtractAlpha;
+  WebPDispatchAlpha = DispatchAlpha_SSE2;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
+  WebPExtractAlpha = ExtractAlpha_SSE2;
+
+  WebPHasAlpha8b = HasAlpha8b_SSE2;
+  WebPHasAlpha32b = HasAlpha32b_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/alpha_processing_sse41.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c
index 986fde94ed..56040f9c88 100644
--- a/thirdparty/libwebp/dsp/alpha_processing_sse41.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE41)
 
@@ -19,9 +19,9 @@
 
 //------------------------------------------------------------------------------
 
-static int ExtractAlpha(const uint8_t* argb, int argb_stride,
-                        int width, int height,
-                        uint8_t* alpha, int alpha_stride) {
+static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
+                              int width, int height,
+                              uint8_t* alpha, int alpha_stride) {
   // alpha_and stores an 'and' operation of all the alpha[] values. The final
   // value is not 0xff if any of the alpha[] is not equal to 0xff.
   uint32_t alpha_and = 0xff;
@@ -82,7 +82,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
 extern void WebPInitAlphaProcessingSSE41(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
-  WebPExtractAlpha = ExtractAlpha;
+  WebPExtractAlpha = ExtractAlpha_SSE41;
 }
 
 #else  // !WEBP_USE_SSE41
diff --git a/thirdparty/libwebp/dsp/common_sse2.h b/thirdparty/libwebp/src/dsp/common_sse2.h
index 995d7cf4ea..995d7cf4ea 100644
--- a/thirdparty/libwebp/dsp/common_sse2.h
+++ b/thirdparty/libwebp/src/dsp/common_sse2.h
diff --git a/thirdparty/libwebp/dsp/cost.c b/thirdparty/libwebp/src/dsp/cost.c
index 58ddea7248..a732389d58 100644
--- a/thirdparty/libwebp/dsp/cost.c
+++ b/thirdparty/libwebp/src/dsp/cost.c
@@ -9,8 +9,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
-#include "../enc/cost_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/enc/cost_enc.h"
 
 //------------------------------------------------------------------------------
 // Boolean-cost cost table
@@ -319,7 +319,7 @@ const uint8_t VP8EncBands[16 + 1] = {
 //------------------------------------------------------------------------------
 // Mode costs
 
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
   int n = res->first;
   // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
   const int p0 = res->prob[n][ctx0][0];
@@ -354,8 +354,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
   return cost;
 }
 
-static void SetResidualCoeffs(const int16_t* const coeffs,
-                              VP8Residual* const res) {
+static void SetResidualCoeffs_C(const int16_t* const coeffs,
+                                VP8Residual* const res) {
   int n;
   res->last = -1;
   assert(res->first == 0 || coeffs[0] == 0);
@@ -384,8 +384,8 @@ static volatile VP8CPUInfo cost_last_cpuinfo_used =
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
   if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
 
-  VP8GetResidualCost = GetResidualCost;
-  VP8SetResidualCoeffs = SetResidualCoeffs;
+  VP8GetResidualCost = GetResidualCost_C;
+  VP8SetResidualCoeffs = SetResidualCoeffs_C;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
diff --git a/thirdparty/libwebp/dsp/cost_mips32.c b/thirdparty/libwebp/src/dsp/cost_mips32.c
index 3102da877a..0500f88c13 100644
--- a/thirdparty/libwebp/dsp/cost_mips32.c
+++ b/thirdparty/libwebp/src/dsp/cost_mips32.c
@@ -9,13 +9,13 @@
 //
 // Author: Djordje Pesut (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS32)
 
-#include "../enc/cost_enc.h"
+#include "src/enc/cost_enc.h"
 
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
   int temp0, temp1;
   int v_reg, ctx_reg;
   int n = res->first;
@@ -96,8 +96,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
   return cost;
 }
 
-static void SetResidualCoeffs(const int16_t* const coeffs,
-                              VP8Residual* const res) {
+static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
+                                     VP8Residual* const res) {
   const int16_t* p_coeffs = (int16_t*)coeffs;
   int temp0, temp1, temp2, n, n1;
   assert(res->first == 0 || coeffs[0] == 0);
@@ -143,8 +143,8 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
 extern void VP8EncDspCostInitMIPS32(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
-  VP8GetResidualCost = GetResidualCost;
-  VP8SetResidualCoeffs = SetResidualCoeffs;
+  VP8GetResidualCost = GetResidualCost_MIPS32;
+  VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
 }
 
 #else  // !WEBP_USE_MIPS32
diff --git a/thirdparty/libwebp/dsp/cost_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/cost_mips_dsp_r2.c
index 6ec8aeb610..51248de7a1 100644
--- a/thirdparty/libwebp/dsp/cost_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/cost_mips_dsp_r2.c
@@ -9,13 +9,13 @@
 //
 // Author: Djordje Pesut (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "../enc/cost_enc.h"
+#include "src/enc/cost_enc.h"
 
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
   int temp0, temp1;
   int v_reg, ctx_reg;
   int n = res->first;
@@ -97,7 +97,7 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
 extern void VP8EncDspCostInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
-  VP8GetResidualCost = GetResidualCost;
+  VP8GetResidualCost = GetResidualCost_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/cost_sse2.c b/thirdparty/libwebp/src/dsp/cost_sse2.c
index 421d51fdd5..487a079921 100644
--- a/thirdparty/libwebp/dsp/cost_sse2.c
+++ b/thirdparty/libwebp/src/dsp/cost_sse2.c
@@ -11,19 +11,19 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 #include <emmintrin.h>
 
-#include "../enc/cost_enc.h"
-#include "../enc/vp8i_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 
-static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
-                                  VP8Residual* const res) {
+static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
+                                   VP8Residual* const res) {
   const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
   const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
   // Use SSE2 to compare 16 values with a single instruction.
@@ -42,7 +42,7 @@ static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
   res->coeffs = coeffs;
 }
 
-static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
+static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
   uint8_t levels[16], ctxs[16];
   uint16_t abs_levels[16];
   int n = res->first;
@@ -108,8 +108,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
 extern void VP8EncDspCostInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
-  VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
-  VP8GetResidualCost = GetResidualCostSSE2;
+  VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
+  VP8GetResidualCost = GetResidualCost_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c
index b5583b6e9b..8b40feed29 100644
--- a/thirdparty/libwebp/dsp/cpu.c
+++ b/thirdparty/libwebp/src/dsp/cpu.c
@@ -11,7 +11,7 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_HAVE_NEON_RTCD)
 #include <stdio.h>
@@ -143,7 +143,7 @@ static int x86CPUInfo(CPUFeature feature) {
     return !!(cpu_info[2] & (1 << 0));
   }
   if (feature == kSlowSSSE3) {
-    if (is_intel && (cpu_info[2] & (1 << 0))) {   // SSSE3?
+    if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
       return CheckSlowModel(cpu_info[0]);
     }
     return 0;
diff --git a/thirdparty/libwebp/dsp/dec.c b/thirdparty/libwebp/src/dsp/dec.c
index 007e985d8b..7e82407567 100644
--- a/thirdparty/libwebp/dsp/dec.c
+++ b/thirdparty/libwebp/src/dsp/dec.c
@@ -11,9 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
-#include "../dec/vp8i_dec.h"
-#include "../utils/utils.h"
+#include <assert.h>
+
+#include "src/dsp/dsp.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 
@@ -25,7 +27,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 // Transforms (Paragraph 14.4)
 
 #define STORE(x, y, v) \
-  dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
+  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
 
 #define STORE2(y, dc, d, c) do {    \
   const int DC = (dc);              \
@@ -38,7 +40,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define MUL1(a) ((((a) * 20091) >> 16) + (a))
 #define MUL2(a) (((a) * 35468) >> 16)
 
-static void TransformOne(const int16_t* in, uint8_t* dst) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformOne_C(const int16_t* in, uint8_t* dst) {
   int C[4 * 4], *tmp;
   int i;
   tmp = C;
@@ -78,7 +81,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
 }
 
 // Simplified transform when only in[0], in[1] and in[4] are non-zero
-static void TransformAC3(const int16_t* in, uint8_t* dst) {
+static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
   const int a = in[0] + 4;
   const int c4 = MUL2(in[4]);
   const int d4 = MUL1(in[4]);
@@ -93,19 +96,21 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
 #undef MUL2
 #undef STORE2
 
-static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOne(in, dst);
+static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne_C(in, dst);
   if (do_two) {
-    TransformOne(in + 16, dst + 4);
+    TransformOne_C(in + 16, dst + 4);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void TransformUV(const int16_t* in, uint8_t* dst) {
+static void TransformUV_C(const int16_t* in, uint8_t* dst) {
   VP8Transform(in + 0 * 16, dst, 1);
   VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
 }
 
-static void TransformDC(const int16_t* in, uint8_t* dst) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformDC_C(const int16_t* in, uint8_t* dst) {
   const int DC = in[0] + 4;
   int i, j;
   for (j = 0; j < 4; ++j) {
@@ -114,8 +119,9 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
     }
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void TransformDCUV(const int16_t* in, uint8_t* dst) {
+static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
   if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
   if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
   if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
@@ -127,7 +133,8 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
 //------------------------------------------------------------------------------
 // Paragraph 14.3
 
-static void TransformWHT(const int16_t* in, int16_t* out) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformWHT_C(const int16_t* in, int16_t* out) {
   int tmp[16];
   int i;
   for (i = 0; i < 4; ++i) {
@@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
     out += 64;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
 
@@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
 
 #define DST(x, y) dst[(x) + (y) * BPS]
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
   const uint8_t* top = dst - BPS;
   const uint8_t* const clip0 = VP8kclip1 - top[-1];
@@ -174,21 +183,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
     dst += BPS;
   }
 }
-static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
-static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
-static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }
+static void TM4_C(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16_C(uint8_t* dst)  { TrueMotion(dst, 16); }
 
 //------------------------------------------------------------------------------
 // 16x16
 
-static void VE16(uint8_t* dst) {     // vertical
+static void VE16_C(uint8_t* dst) {     // vertical
   int j;
   for (j = 0; j < 16; ++j) {
     memcpy(dst + j * BPS, dst - BPS, 16);
   }
 }
 
-static void HE16(uint8_t* dst) {     // horizontal
+static void HE16_C(uint8_t* dst) {     // horizontal
   int j;
   for (j = 16; j > 0; --j) {
     memset(dst, dst[-1], 16);
@@ -203,7 +212,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
   }
 }
 
-static void DC16(uint8_t* dst) {    // DC
+static void DC16_C(uint8_t* dst) {    // DC
   int DC = 16;
   int j;
   for (j = 0; j < 16; ++j) {
@@ -212,7 +221,7 @@ static void DC16(uint8_t* dst) {    // DC
   Put16(DC >> 5, dst);
 }
 
-static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
   int DC = 8;
   int j;
   for (j = 0; j < 16; ++j) {
@@ -221,7 +230,7 @@ static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
   Put16(DC >> 4, dst);
 }
 
-static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
+static void DC16NoLeft_C(uint8_t* dst) {  // DC with left samples not available
   int DC = 8;
   int i;
   for (i = 0; i < 16; ++i) {
@@ -230,9 +239,10 @@ static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
   Put16(DC >> 4, dst);
 }
 
-static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
+static void DC16NoTopLeft_C(uint8_t* dst) {  // DC with no top and left samples
   Put16(0x80, dst);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 
@@ -242,7 +252,8 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
 
-static void VE4(uint8_t* dst) {    // vertical
+#if !WEBP_NEON_OMIT_C_CODE
+static void VE4_C(uint8_t* dst) {    // vertical
   const uint8_t* top = dst - BPS;
   const uint8_t vals[4] = {
     AVG3(top[-1], top[0], top[1]),
@@ -255,8 +266,9 @@ static void VE4(uint8_t* dst) {    // vertical
     memcpy(dst + i * BPS, vals, sizeof(vals));
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void HE4(uint8_t* dst) {    // horizontal
+static void HE4_C(uint8_t* dst) {    // horizontal
   const int A = dst[-1 - BPS];
   const int B = dst[-1];
   const int C = dst[-1 + BPS];
@@ -268,7 +280,8 @@ static void HE4(uint8_t* dst) {    // horizontal
   WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
 }
 
-static void DC4(uint8_t* dst) {   // DC
+#if !WEBP_NEON_OMIT_C_CODE
+static void DC4_C(uint8_t* dst) {   // DC
   uint32_t dc = 4;
   int i;
   for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -276,7 +289,7 @@ static void DC4(uint8_t* dst) {   // DC
   for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
 }
 
-static void RD4(uint8_t* dst) {   // Down-right
+static void RD4_C(uint8_t* dst) {   // Down-right
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -295,7 +308,7 @@ static void RD4(uint8_t* dst) {   // Down-right
                                       DST(3, 0) = AVG3(D, C, B);
 }
 
-static void LD4(uint8_t* dst) {   // Down-Left
+static void LD4_C(uint8_t* dst) {   // Down-Left
   const int A = dst[0 - BPS];
   const int B = dst[1 - BPS];
   const int C = dst[2 - BPS];
@@ -312,8 +325,9 @@ static void LD4(uint8_t* dst) {   // Down-Left
                           DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
                                       DST(3, 3) = AVG3(G, H, H);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void VR4(uint8_t* dst) {   // Vertical-Right
+static void VR4_C(uint8_t* dst) {   // Vertical-Right
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -335,7 +349,7 @@ static void VR4(uint8_t* dst) {   // Vertical-Right
   DST(3, 1) =             AVG3(B, C, D);
 }
 
-static void VL4(uint8_t* dst) {   // Vertical-Left
+static void VL4_C(uint8_t* dst) {   // Vertical-Left
   const int A = dst[0 - BPS];
   const int B = dst[1 - BPS];
   const int C = dst[2 - BPS];
@@ -357,7 +371,7 @@ static void VL4(uint8_t* dst) {   // Vertical-Left
               DST(3, 3) = AVG3(F, G, H);
 }
 
-static void HU4(uint8_t* dst) {   // Horizontal-Up
+static void HU4_C(uint8_t* dst) {   // Horizontal-Up
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -372,7 +386,7 @@ static void HU4(uint8_t* dst) {   // Horizontal-Up
     DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 
-static void HD4(uint8_t* dst) {  // Horizontal-Down
+static void HD4_C(uint8_t* dst) {  // Horizontal-Down
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -404,14 +418,15 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
 //------------------------------------------------------------------------------
 // Chroma
 
-static void VE8uv(uint8_t* dst) {    // vertical
+#if !WEBP_NEON_OMIT_C_CODE
+static void VE8uv_C(uint8_t* dst) {    // vertical
   int j;
   for (j = 0; j < 8; ++j) {
     memcpy(dst + j * BPS, dst - BPS, 8);
   }
 }
 
-static void HE8uv(uint8_t* dst) {    // horizontal
+static void HE8uv_C(uint8_t* dst) {    // horizontal
   int j;
   for (j = 0; j < 8; ++j) {
     memset(dst, dst[-1], 8);
@@ -427,7 +442,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
   }
 }
 
-static void DC8uv(uint8_t* dst) {     // DC
+static void DC8uv_C(uint8_t* dst) {     // DC
   int dc0 = 8;
   int i;
   for (i = 0; i < 8; ++i) {
@@ -436,7 +451,7 @@ static void DC8uv(uint8_t* dst) {     // DC
   Put8x8uv(dc0 >> 4, dst);
 }
 
-static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
   int dc0 = 4;
   int i;
   for (i = 0; i < 8; ++i) {
@@ -445,7 +460,7 @@ static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
   Put8x8uv(dc0 >> 3, dst);
 }
 
-static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
   int dc0 = 4;
   int i;
   for (i = 0; i < 8; ++i) {
@@ -454,17 +469,19 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
   Put8x8uv(dc0 >> 3, dst);
 }
 
-static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
+static void DC8uvNoTopLeft_C(uint8_t* dst) {    // DC with nothing
   Put8x8uv(0x80, dst);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 
 //------------------------------------------------------------------------------
 // Edge filtering functions
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 // 4 pixels in, 2 pixels out
-static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
   const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
   const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
@@ -474,7 +491,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
 }
 
 // 4 pixels in, 4 pixels out
-static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
   const int a = 3 * (q0 - p0);
   const int a1 = VP8ksclip2[(a + 4) >> 3];
@@ -487,7 +504,7 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
 }
 
 // 6 pixels in, 6 pixels out
-static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
   const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
   const int q0 = p[0], q1 = p[step], q2 = p[2*step];
   const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
@@ -503,18 +520,22 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
   p[ 2*step] = VP8kclip1[q2 - a3];
 }
 
-static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
   return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
-static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
   return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static WEBP_INLINE int needs_filter2(const uint8_t* p,
-                                     int step, int t, int it) {
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
+                                      int step, int t, int it) {
   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
   const int p0 = p[-step], q0 = p[0];
   const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -523,140 +544,159 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
          VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
          VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
   int i;
   const int thresh2 = 2 * thresh + 1;
   for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i, stride, thresh2)) {
-      do_filter2(p + i, stride);
+    if (NeedsFilter_C(p + i, stride, thresh2)) {
+      DoFilter2_C(p + i, stride);
     }
   }
 }
 
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
   int i;
   const int thresh2 = 2 * thresh + 1;
   for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i * stride, 1, thresh2)) {
-      do_filter2(p + i * stride, 1);
+    if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
+      DoFilter2_C(p + i * stride, 1);
     }
   }
 }
 
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
+    SimpleVFilter16_C(p, stride, thresh);
   }
 }
 
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4;
-    SimpleHFilter16(p, stride, thresh);
+    SimpleHFilter16_C(p, stride, thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)
 
-static WEBP_INLINE void FilterLoop26(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
+                                       int hstride, int vstride, int size,
+                                       int thresh, int ithresh,
+                                       int hev_thresh) {
   const int thresh2 = 2 * thresh + 1;
   while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
-      if (hev(p, hstride, hev_thresh)) {
-        do_filter2(p, hstride);
+    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
+      if (Hev(p, hstride, hev_thresh)) {
+        DoFilter2_C(p, hstride);
       } else {
-        do_filter6(p, hstride);
+        DoFilter6_C(p, hstride);
       }
     }
     p += vstride;
   }
 }
 
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
+static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
+                                       int hstride, int vstride, int size,
+                                       int thresh, int ithresh,
+                                       int hev_thresh) {
   const int thresh2 = 2 * thresh + 1;
   while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh2, ithresh)) {
-      if (hev(p, hstride, hev_thresh)) {
-        do_filter2(p, hstride);
+    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
+      if (Hev(p, hstride, hev_thresh)) {
+        DoFilter2_C(p, hstride);
       } else {
-        do_filter4(p, hstride);
+        DoFilter4_C(p, hstride);
       }
     }
     p += vstride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 // on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+static void VFilter16_C(uint8_t* p, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 }
 
-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+static void HFilter16_C(uint8_t* p, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 }
 
 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_C(uint8_t* p, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4 * stride;
-    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+    FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter16i_C(uint8_t* p, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4;
-    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+    FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 // 8-pixels wide variant, for chroma filtering
-static void VFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void HFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
-  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+#if !WEBP_NEON_OMIT_C_CODE
+static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 
-static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
-                             int dst_stride) {
+static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
+                               int dst_stride) {
   int i, j;
   for (j = 0; j < 8; ++j) {
     for (i = 0; i < 8; ++i) {
@@ -709,54 +749,66 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
 
   VP8InitClipTables();
 
-  VP8TransformWHT = TransformWHT;
-  VP8Transform = TransformTwo;
-  VP8TransformUV = TransformUV;
-  VP8TransformDC = TransformDC;
-  VP8TransformDCUV = TransformDCUV;
-  VP8TransformAC3 = TransformAC3;
-
-  VP8VFilter16 = VFilter16;
-  VP8HFilter16 = HFilter16;
-  VP8VFilter8 = VFilter8;
-  VP8HFilter8 = HFilter8;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16i = HFilter16i;
-  VP8VFilter8i = VFilter8i;
-  VP8HFilter8i = HFilter8i;
-  VP8SimpleVFilter16 = SimpleVFilter16;
-  VP8SimpleHFilter16 = SimpleHFilter16;
-  VP8SimpleVFilter16i = SimpleVFilter16i;
-  VP8SimpleHFilter16i = SimpleHFilter16i;
-
-  VP8PredLuma4[0] = DC4;
-  VP8PredLuma4[1] = TM4;
-  VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[3] = HE4;
-  VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[5] = VR4;
-  VP8PredLuma4[6] = LD4;
-  VP8PredLuma4[7] = VL4;
-  VP8PredLuma4[8] = HD4;
-  VP8PredLuma4[9] = HU4;
-
-  VP8PredLuma16[0] = DC16;
-  VP8PredLuma16[1] = TM16;
-  VP8PredLuma16[2] = VE16;
-  VP8PredLuma16[3] = HE16;
-  VP8PredLuma16[4] = DC16NoTop;
-  VP8PredLuma16[5] = DC16NoLeft;
-  VP8PredLuma16[6] = DC16NoTopLeft;
-
-  VP8PredChroma8[0] = DC8uv;
-  VP8PredChroma8[1] = TM8uv;
-  VP8PredChroma8[2] = VE8uv;
-  VP8PredChroma8[3] = HE8uv;
-  VP8PredChroma8[4] = DC8uvNoTop;
-  VP8PredChroma8[5] = DC8uvNoLeft;
-  VP8PredChroma8[6] = DC8uvNoTopLeft;
-
-  VP8DitherCombine8x8 = DitherCombine8x8;
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8TransformWHT = TransformWHT_C;
+  VP8Transform = TransformTwo_C;
+  VP8TransformDC = TransformDC_C;
+  VP8TransformAC3 = TransformAC3_C;
+#endif
+  VP8TransformUV = TransformUV_C;
+  VP8TransformDCUV = TransformDCUV_C;
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8VFilter16 = VFilter16_C;
+  VP8VFilter16i = VFilter16i_C;
+  VP8HFilter16 = HFilter16_C;
+  VP8VFilter8 = VFilter8_C;
+  VP8VFilter8i = VFilter8i_C;
+  VP8SimpleVFilter16 = SimpleVFilter16_C;
+  VP8SimpleHFilter16 = SimpleHFilter16_C;
+  VP8SimpleVFilter16i = SimpleVFilter16i_C;
+  VP8SimpleHFilter16i = SimpleHFilter16i_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8HFilter16i = HFilter16i_C;
+  VP8HFilter8 = HFilter8_C;
+  VP8HFilter8i = HFilter8i_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8PredLuma4[0] = DC4_C;
+  VP8PredLuma4[1] = TM4_C;
+  VP8PredLuma4[2] = VE4_C;
+  VP8PredLuma4[4] = RD4_C;
+  VP8PredLuma4[6] = LD4_C;
+#endif
+
+  VP8PredLuma4[3] = HE4_C;
+  VP8PredLuma4[5] = VR4_C;
+  VP8PredLuma4[7] = VL4_C;
+  VP8PredLuma4[8] = HD4_C;
+  VP8PredLuma4[9] = HU4_C;
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8PredLuma16[0] = DC16_C;
+  VP8PredLuma16[1] = TM16_C;
+  VP8PredLuma16[2] = VE16_C;
+  VP8PredLuma16[3] = HE16_C;
+  VP8PredLuma16[4] = DC16NoTop_C;
+  VP8PredLuma16[5] = DC16NoLeft_C;
+  VP8PredLuma16[6] = DC16NoTopLeft_C;
+
+  VP8PredChroma8[0] = DC8uv_C;
+  VP8PredChroma8[1] = TM8uv_C;
+  VP8PredChroma8[2] = VE8uv_C;
+  VP8PredChroma8[3] = HE8uv_C;
+  VP8PredChroma8[4] = DC8uvNoTop_C;
+  VP8PredChroma8[5] = DC8uvNoLeft_C;
+  VP8PredChroma8[6] = DC8uvNoTopLeft_C;
+#endif
+
+  VP8DitherCombine8x8 = DitherCombine8x8_C;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -770,11 +822,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8DspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8DspInitMIPS32();
@@ -791,5 +838,57 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8DspInitNEON();
+  }
+#endif
+
+  assert(VP8TransformWHT != NULL);
+  assert(VP8Transform != NULL);
+  assert(VP8TransformDC != NULL);
+  assert(VP8TransformAC3 != NULL);
+  assert(VP8TransformUV != NULL);
+  assert(VP8TransformDCUV != NULL);
+  assert(VP8VFilter16 != NULL);
+  assert(VP8HFilter16 != NULL);
+  assert(VP8VFilter8 != NULL);
+  assert(VP8HFilter8 != NULL);
+  assert(VP8VFilter16i != NULL);
+  assert(VP8HFilter16i != NULL);
+  assert(VP8VFilter8i != NULL);
+  assert(VP8HFilter8i != NULL);
+  assert(VP8SimpleVFilter16 != NULL);
+  assert(VP8SimpleHFilter16 != NULL);
+  assert(VP8SimpleVFilter16i != NULL);
+  assert(VP8SimpleHFilter16i != NULL);
+  assert(VP8PredLuma4[0] != NULL);
+  assert(VP8PredLuma4[1] != NULL);
+  assert(VP8PredLuma4[2] != NULL);
+  assert(VP8PredLuma4[3] != NULL);
+  assert(VP8PredLuma4[4] != NULL);
+  assert(VP8PredLuma4[5] != NULL);
+  assert(VP8PredLuma4[6] != NULL);
+  assert(VP8PredLuma4[7] != NULL);
+  assert(VP8PredLuma4[8] != NULL);
+  assert(VP8PredLuma4[9] != NULL);
+  assert(VP8PredLuma16[0] != NULL);
+  assert(VP8PredLuma16[1] != NULL);
+  assert(VP8PredLuma16[2] != NULL);
+  assert(VP8PredLuma16[3] != NULL);
+  assert(VP8PredLuma16[4] != NULL);
+  assert(VP8PredLuma16[5] != NULL);
+  assert(VP8PredLuma16[6] != NULL);
+  assert(VP8PredChroma8[0] != NULL);
+  assert(VP8PredChroma8[1] != NULL);
+  assert(VP8PredChroma8[2] != NULL);
+  assert(VP8PredChroma8[3] != NULL);
+  assert(VP8PredChroma8[4] != NULL);
+  assert(VP8PredChroma8[5] != NULL);
+  assert(VP8PredChroma8[6] != NULL);
+  assert(VP8DitherCombine8x8 != NULL);
+
   dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/dec_clip_tables.c b/thirdparty/libwebp/src/dsp/dec_clip_tables.c
index 74ba34c0bb..427b74f776 100644
--- a/thirdparty/libwebp/dsp/dec_clip_tables.c
+++ b/thirdparty/libwebp/src/dsp/dec_clip_tables.c
@@ -11,11 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#define USE_STATIC_TABLES     // undefine to have run-time table initialization
+// define to 0 to have run-time table initialization
+#if !defined(USE_STATIC_TABLES)
+#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
+#endif
 
-#ifdef USE_STATIC_TABLES
+#if (USE_STATIC_TABLES == 1)
 
 static const uint8_t abs0[255 + 255 + 1] = {
   0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
@@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;
 
-#endif
+#endif    // USE_STATIC_TABLES
 
 const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
 const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
 const uint8_t* const VP8kabs0 = &abs0[255];
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
-#if !defined(USE_STATIC_TABLES)
+#if (USE_STATIC_TABLES == 0)
   int i;
   if (!tables_ok) {
     for (i = -255; i <= 255; ++i) {
diff --git a/thirdparty/libwebp/dsp/dec_mips32.c b/thirdparty/libwebp/src/dsp/dec_mips32.c
index 4e9ef42605..e4e70966d2 100644
--- a/thirdparty/libwebp/dsp/dec_mips32.c
+++ b/thirdparty/libwebp/src/dsp/dec_mips32.c
@@ -12,11 +12,11 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS32)
 
-#include "./mips_macro.h"
+#include "src/dsp/mips_macro.h"
 
 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
diff --git a/thirdparty/libwebp/dsp/dec_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
index db5c657228..b0936bc46e 100644
--- a/thirdparty/libwebp/dsp/dec_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
@@ -12,11 +12,11 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "./mips_macro.h"
+#include "src/dsp/mips_macro.h"
 
 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
diff --git a/thirdparty/libwebp/dsp/dec_msa.c b/thirdparty/libwebp/src/dsp/dec_msa.c
index 8d9c98c3cf..8090622b7b 100644
--- a/thirdparty/libwebp/dsp/dec_msa.c
+++ b/thirdparty/libwebp/src/dsp/dec_msa.c
@@ -12,11 +12,11 @@
 // Author(s):  Prashant Patil   (prashant.patil@imgtec.com)
 
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
-#include "./msa_macro.h"
+#include "src/dsp/msa_macro.h"
 
 //------------------------------------------------------------------------------
 // Transforms
@@ -222,6 +222,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
   const v16i8 cnst4b = __msa_ldi_b(4);                        \
   const v16i8 cnst3b = __msa_ldi_b(3);                        \
   const v8i16 cnst9h = __msa_ldi_h(9);                        \
+  const v8i16 cnst63h = __msa_ldi_h(63);                      \
                                                               \
   FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m);         \
   filt = __msa_subs_s_b(p1_m, q1_m);                          \
@@ -241,9 +242,9 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
   ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l);               \
   /* update q2/p2 */                                          \
   temp0 = filt_r * cnst9h;                                    \
-  temp1 = ADDVI_H(temp0, 63);                                 \
+  temp1 = temp0 + cnst63h;                                    \
   temp2 = filt_l * cnst9h;                                    \
-  temp3 = ADDVI_H(temp2, 63);                                 \
+  temp3 = temp2 + cnst63h;                                    \
   FILT2(q2_m, p2_m, q2, p2);                                  \
   /* update q1/p1 */                                          \
   temp1 = temp1 + temp0;                                      \
@@ -708,7 +709,7 @@ static void VE4(uint8_t* dst) {    // vertical
   const uint32_t val0 = LW(ptop + 0);
   const uint32_t val1 = LW(ptop + 4);
   uint32_t out;
-  v16u8 A, B, C, AC, B2, R;
+  v16u8 A = { 0 }, B, C, AC, B2, R;
 
   INSERT_W2_UB(val0, val1, A);
   B = SLDI_UB(A, A, 1);
@@ -725,7 +726,7 @@ static void RD4(uint8_t* dst) {   // Down-right
   uint32_t val0 = LW(ptop + 0);
   uint32_t val1 = LW(ptop + 4);
   uint32_t val2, val3;
-  v16u8 A, B, C, AC, B2, R, A1;
+  v16u8 A, B, C, AC, B2, R, A1 = { 0 };
 
   INSERT_W2_UB(val0, val1, A1);
   A = SLDI_UB(A1, A1, 12);
@@ -753,7 +754,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
   uint32_t val0 = LW(ptop + 0);
   uint32_t val1 = LW(ptop + 4);
   uint32_t val2, val3;
-  v16u8 A, B, C, AC, B2, R;
+  v16u8 A = { 0 }, B, C, AC, B2, R;
 
   INSERT_W2_UB(val0, val1, A);
   B = SLDI_UB(A, A, 1);
diff --git a/thirdparty/libwebp/dsp/dec_neon.c b/thirdparty/libwebp/src/dsp/dec_neon.c
index 34796cf4a2..ffa697fcf9 100644
--- a/thirdparty/libwebp/dsp/dec_neon.c
+++ b/thirdparty/libwebp/src/dsp/dec_neon.c
@@ -12,43 +12,23 @@
 // Authors: Somnath Banerjee (somnath@google.com)
 //          Johann Koenig (johannkoenig@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
-#include "./neon.h"
-#include "../dec/vp8i_dec.h"
+#include "src/dsp/neon.h"
+#include "src/dec/vp8i_dec.h"
 
 //------------------------------------------------------------------------------
 // NxM Loading functions
 
-// Load/Store vertical edge
-#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
-  "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
-  "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
-  "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \
-  "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \
-  "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \
-  "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \
-  "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \
-  "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
-
-#define STORE8x2(c1, c2, p, stride)                                            \
-  "vst2.8   {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n"                    \
-  "vst2.8   {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
-
 #if !defined(WORK_AROUND_GCC)
 
 // This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation
 // (register alloc, probably). The variants somewhat mitigate the problem, but
 // not quite. HFilter16i() remains problematic.
-static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
+static WEBP_INLINE uint8x8x4_t Load4x8_NEON(const uint8_t* const src,
+                                            int stride) {
   const uint8x8_t zero = vdup_n_u8(0);
   uint8x8x4_t out;
   INIT_VECTOR4(out, zero, zero, zero, zero);
@@ -63,13 +43,15 @@ static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
   return out;
 }
 
-static WEBP_INLINE void Load4x16(const uint8_t* const src, int stride,
-                                 uint8x16_t* const p1, uint8x16_t* const p0,
-                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+static WEBP_INLINE void Load4x16_NEON(const uint8_t* const src, int stride,
+                                      uint8x16_t* const p1,
+                                      uint8x16_t* const p0,
+                                      uint8x16_t* const q0,
+                                      uint8x16_t* const q1) {
   // row0 = p1[0..7]|p0[0..7]|q0[0..7]|q1[0..7]
   // row8 = p1[8..15]|p0[8..15]|q0[8..15]|q1[8..15]
-  const uint8x8x4_t row0 = Load4x8(src - 2 + 0 * stride, stride);
-  const uint8x8x4_t row8 = Load4x8(src - 2 + 8 * stride, stride);
+  const uint8x8x4_t row0 = Load4x8_NEON(src - 2 + 0 * stride, stride);
+  const uint8x8x4_t row8 = Load4x8_NEON(src - 2 + 8 * stride, stride);
   *p1 = vcombine_u8(row0.val[0], row8.val[0]);
   *p0 = vcombine_u8(row0.val[1], row8.val[1]);
   *q0 = vcombine_u8(row0.val[2], row8.val[2]);
@@ -83,9 +65,11 @@ static WEBP_INLINE void Load4x16(const uint8_t* const src, int stride,
   src += stride;                                                     \
 } while (0)
 
-static WEBP_INLINE void Load4x16(const uint8_t* src, int stride,
-                                 uint8x16_t* const p1, uint8x16_t* const p0,
-                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
+                                      uint8x16_t* const p1,
+                                      uint8x16_t* const p0,
+                                      uint8x16_t* const q0,
+                                      uint8x16_t* const q1) {
   const uint32x4_t zero = vdupq_n_u32(0);
   uint32x4x4_t in;
   INIT_VECTOR4(in, zero, zero, zero, zero);
@@ -126,40 +110,40 @@ static WEBP_INLINE void Load4x16(const uint8_t* src, int stride,
 
 #endif  // !WORK_AROUND_GCC
 
-static WEBP_INLINE void Load8x16(const uint8_t* const src, int stride,
-                                 uint8x16_t* const p3, uint8x16_t* const p2,
-                                 uint8x16_t* const p1, uint8x16_t* const p0,
-                                 uint8x16_t* const q0, uint8x16_t* const q1,
-                                 uint8x16_t* const q2, uint8x16_t* const q3) {
-  Load4x16(src - 2, stride, p3, p2, p1, p0);
-  Load4x16(src + 2, stride, q0, q1, q2, q3);
+static WEBP_INLINE void Load8x16_NEON(
+    const uint8_t* const src, int stride,
+    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load4x16_NEON(src - 2, stride, p3, p2, p1, p0);
+  Load4x16_NEON(src + 2, stride, q0, q1, q2, q3);
 }
 
-static WEBP_INLINE void Load16x4(const uint8_t* const src, int stride,
-                                 uint8x16_t* const p1, uint8x16_t* const p0,
-                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+static WEBP_INLINE void Load16x4_NEON(const uint8_t* const src, int stride,
+                                      uint8x16_t* const p1,
+                                      uint8x16_t* const p0,
+                                      uint8x16_t* const q0,
+                                      uint8x16_t* const q1) {
   *p1 = vld1q_u8(src - 2 * stride);
   *p0 = vld1q_u8(src - 1 * stride);
   *q0 = vld1q_u8(src + 0 * stride);
   *q1 = vld1q_u8(src + 1 * stride);
 }
 
-static WEBP_INLINE void Load16x8(const uint8_t* const src, int stride,
-                                 uint8x16_t* const p3, uint8x16_t* const p2,
-                                 uint8x16_t* const p1, uint8x16_t* const p0,
-                                 uint8x16_t* const q0, uint8x16_t* const q1,
-                                 uint8x16_t* const q2, uint8x16_t* const q3) {
-  Load16x4(src - 2  * stride, stride, p3, p2, p1, p0);
-  Load16x4(src + 2  * stride, stride, q0, q1, q2, q3);
+static WEBP_INLINE void Load16x8_NEON(
+    const uint8_t* const src, int stride,
+    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load16x4_NEON(src - 2  * stride, stride, p3, p2, p1, p0);
+  Load16x4_NEON(src + 2  * stride, stride, q0, q1, q2, q3);
 }
 
-static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
-                                  const uint8_t* const v,
-                                  int stride,
-                                  uint8x16_t* const p3, uint8x16_t* const p2,
-                                  uint8x16_t* const p1, uint8x16_t* const p0,
-                                  uint8x16_t* const q0, uint8x16_t* const q1,
-                                  uint8x16_t* const q2, uint8x16_t* const q3) {
+static WEBP_INLINE void Load8x8x2_NEON(
+    const uint8_t* const u, const uint8_t* const v, int stride,
+    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q2, uint8x16_t* const q3) {
   // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
   // and the v-samples on the higher half.
   *p3 = vcombine_u8(vld1_u8(u - 4 * stride), vld1_u8(v - 4 * stride));
@@ -177,13 +161,11 @@ static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
 #define LOAD_UV_8(ROW) \
   vcombine_u8(vld1_u8(u - 4 + (ROW) * stride), vld1_u8(v - 4 + (ROW) * stride))
 
-static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
-                                   const uint8_t* const v,
-                                   int stride,
-                                   uint8x16_t* const p3, uint8x16_t* const p2,
-                                   uint8x16_t* const p1, uint8x16_t* const p0,
-                                   uint8x16_t* const q0, uint8x16_t* const q1,
-                                   uint8x16_t* const q2, uint8x16_t* const q3) {
+static WEBP_INLINE void Load8x8x2T_NEON(
+    const uint8_t* const u, const uint8_t* const v, int stride,
+    uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
+    uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
+    uint8x16_t* const q2, uint8x16_t* const q3) {
   // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
   // and the v-samples on the higher half.
   const uint8x16_t row0 = LOAD_UV_8(0);
@@ -238,8 +220,8 @@ static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
 
 #endif  // !WORK_AROUND_GCC
 
-static WEBP_INLINE void Store2x8(const uint8x8x2_t v,
-                                 uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v,
+                                      uint8_t* const dst, int stride) {
   vst2_lane_u8(dst + 0 * stride, v, 0);
   vst2_lane_u8(dst + 1 * stride, v, 1);
   vst2_lane_u8(dst + 2 * stride, v, 2);
@@ -250,20 +232,20 @@ static WEBP_INLINE void Store2x8(const uint8x8x2_t v,
   vst2_lane_u8(dst + 7 * stride, v, 7);
 }
 
-static WEBP_INLINE void Store2x16(const uint8x16_t p0, const uint8x16_t q0,
-                                  uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store2x16_NEON(const uint8x16_t p0, const uint8x16_t q0,
+                                       uint8_t* const dst, int stride) {
   uint8x8x2_t lo, hi;
   lo.val[0] = vget_low_u8(p0);
   lo.val[1] = vget_low_u8(q0);
   hi.val[0] = vget_high_u8(p0);
   hi.val[1] = vget_high_u8(q0);
-  Store2x8(lo, dst - 1 + 0 * stride, stride);
-  Store2x8(hi, dst - 1 + 8 * stride, stride);
+  Store2x8_NEON(lo, dst - 1 + 0 * stride, stride);
+  Store2x8_NEON(hi, dst - 1 + 8 * stride, stride);
 }
 
 #if !defined(WORK_AROUND_GCC)
-static WEBP_INLINE void Store4x8(const uint8x8x4_t v,
-                                 uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v,
+                                      uint8_t* const dst, int stride) {
   vst4_lane_u8(dst + 0 * stride, v, 0);
   vst4_lane_u8(dst + 1 * stride, v, 1);
   vst4_lane_u8(dst + 2 * stride, v, 2);
@@ -274,9 +256,9 @@ static WEBP_INLINE void Store4x8(const uint8x8x4_t v,
   vst4_lane_u8(dst + 7 * stride, v, 7);
 }
 
-static WEBP_INLINE void Store4x16(const uint8x16_t p1, const uint8x16_t p0,
-                                  const uint8x16_t q0, const uint8x16_t q1,
-                                  uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store4x16_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                                       const uint8x16_t q0, const uint8x16_t q1,
+                                       uint8_t* const dst, int stride) {
   uint8x8x4_t lo, hi;
   INIT_VECTOR4(lo,
                vget_low_u8(p1), vget_low_u8(p0),
@@ -284,27 +266,28 @@ static WEBP_INLINE void Store4x16(const uint8x16_t p1, const uint8x16_t p0,
   INIT_VECTOR4(hi,
                vget_high_u8(p1), vget_high_u8(p0),
                vget_high_u8(q0), vget_high_u8(q1));
-  Store4x8(lo, dst - 2 + 0 * stride, stride);
-  Store4x8(hi, dst - 2 + 8 * stride, stride);
+  Store4x8_NEON(lo, dst - 2 + 0 * stride, stride);
+  Store4x8_NEON(hi, dst - 2 + 8 * stride, stride);
 }
 #endif  // !WORK_AROUND_GCC
 
-static WEBP_INLINE void Store16x2(const uint8x16_t p0, const uint8x16_t q0,
-                                  uint8_t* const dst, int stride) {
+static WEBP_INLINE void Store16x2_NEON(const uint8x16_t p0, const uint8x16_t q0,
+                                       uint8_t* const dst, int stride) {
   vst1q_u8(dst - stride, p0);
   vst1q_u8(dst, q0);
 }
 
-static WEBP_INLINE void Store16x4(const uint8x16_t p1, const uint8x16_t p0,
-                                  const uint8x16_t q0, const uint8x16_t q1,
-                                  uint8_t* const dst, int stride) {
-  Store16x2(p1, p0, dst - stride, stride);
-  Store16x2(q0, q1, dst + stride, stride);
+static WEBP_INLINE void Store16x4_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                                       const uint8x16_t q0, const uint8x16_t q1,
+                                       uint8_t* const dst, int stride) {
+  Store16x2_NEON(p1, p0, dst - stride, stride);
+  Store16x2_NEON(q0, q1, dst + stride, stride);
 }
 
-static WEBP_INLINE void Store8x2x2(const uint8x16_t p0, const uint8x16_t q0,
-                                   uint8_t* const u, uint8_t* const v,
-                                   int stride) {
+static WEBP_INLINE void Store8x2x2_NEON(const uint8x16_t p0,
+                                        const uint8x16_t q0,
+                                        uint8_t* const u, uint8_t* const v,
+                                        int stride) {
   // p0 and q0 contain the u+v samples packed in low/high halves.
   vst1_u8(u - stride, vget_low_u8(p0));
   vst1_u8(u,          vget_low_u8(q0));
@@ -312,13 +295,15 @@ static WEBP_INLINE void Store8x2x2(const uint8x16_t p0, const uint8x16_t q0,
   vst1_u8(v,          vget_high_u8(q0));
 }
 
-static WEBP_INLINE void Store8x4x2(const uint8x16_t p1, const uint8x16_t p0,
-                                   const uint8x16_t q0, const uint8x16_t q1,
-                                   uint8_t* const u, uint8_t* const v,
-                                   int stride) {
+static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
+                                        const uint8x16_t p0,
+                                        const uint8x16_t q0,
+                                        const uint8x16_t q1,
+                                        uint8_t* const u, uint8_t* const v,
+                                        int stride) {
   // The p1...q1 registers contain the u+v samples packed in low/high halves.
-  Store8x2x2(p1, p0, u - stride, v - stride, stride);
-  Store8x2x2(q0, q1, u + stride, v + stride, stride);
+  Store8x2x2_NEON(p1, p0, u - stride, v - stride, stride);
+  Store8x2x2_NEON(q0, q1, u + stride, v + stride, stride);
 }
 
 #if !defined(WORK_AROUND_GCC)
@@ -329,11 +314,10 @@ static WEBP_INLINE void Store8x4x2(const uint8x16_t p1, const uint8x16_t p0,
   (DST) += stride;                                \
 } while (0)
 
-static WEBP_INLINE void Store6x8x2(const uint8x16_t p2, const uint8x16_t p1,
-                                   const uint8x16_t p0, const uint8x16_t q0,
-                                   const uint8x16_t q1, const uint8x16_t q2,
-                                   uint8_t* u, uint8_t* v,
-                                   int stride) {
+static WEBP_INLINE void Store6x8x2_NEON(
+    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
+    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
+    uint8_t* u, uint8_t* v, int stride) {
   uint8x8x3_t u0, u1, v0, v1;
   INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
   INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
@@ -358,10 +342,12 @@ static WEBP_INLINE void Store6x8x2(const uint8x16_t p2, const uint8x16_t p1,
 }
 #undef STORE6_LANE
 
-static WEBP_INLINE void Store4x8x2(const uint8x16_t p1, const uint8x16_t p0,
-                                   const uint8x16_t q0, const uint8x16_t q1,
-                                   uint8_t* const u, uint8_t* const v,
-                                   int stride) {
+static WEBP_INLINE void Store4x8x2_NEON(const uint8x16_t p1,
+                                        const uint8x16_t p0,
+                                        const uint8x16_t q0,
+                                        const uint8x16_t q1,
+                                        uint8_t* const u, uint8_t* const v,
+                                        int stride) {
   uint8x8x4_t u0, v0;
   INIT_VECTOR4(u0,
                vget_low_u8(p1), vget_low_u8(p0),
@@ -390,15 +376,15 @@ static WEBP_INLINE void Store4x8x2(const uint8x16_t p1, const uint8x16_t p0,
 #endif  // !WORK_AROUND_GCC
 
 // Zero extend 'v' to an int16x8_t.
-static WEBP_INLINE int16x8_t ConvertU8ToS16(uint8x8_t v) {
+static WEBP_INLINE int16x8_t ConvertU8ToS16_NEON(uint8x8_t v) {
   return vreinterpretq_s16_u16(vmovl_u8(v));
 }
 
 // Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
 // to the corresponding rows of 'dst'.
-static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
-                                            const int16x8_t dst01,
-                                            const int16x8_t dst23) {
+static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
+                                                 const int16x8_t dst01,
+                                                 const int16x8_t dst23) {
   // Unsigned saturate to 8b.
   const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
   const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
@@ -410,8 +396,9 @@ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
   vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
 }
 
-static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
-                               uint8_t* const dst) {
+static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
+                                    const int16x8_t row23,
+                                    uint8_t* const dst) {
   uint32x2_t dst01 = vdup_n_u32(0);
   uint32x2_t dst23 = vdup_n_u32(0);
 
@@ -423,23 +410,23 @@ static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
 
   {
     // Convert to 16b.
-    const int16x8_t dst01_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst01));
-    const int16x8_t dst23_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst23));
+    const int16x8_t dst01_s16 = ConvertU8ToS16_NEON(vreinterpret_u8_u32(dst01));
+    const int16x8_t dst23_s16 = ConvertU8ToS16_NEON(vreinterpret_u8_u32(dst23));
 
     // Descale with rounding.
     const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
     const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
     // Add the inverse transform.
-    SaturateAndStore4x4(dst, out01, out23);
+    SaturateAndStore4x4_NEON(dst, out01, out23);
   }
 }
 
 //-----------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
-static uint8x16_t NeedsFilter(const uint8x16_t p1, const uint8x16_t p0,
-                              const uint8x16_t q0, const uint8x16_t q1,
-                              int thresh) {
+static uint8x16_t NeedsFilter_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                                   const uint8x16_t q0, const uint8x16_t q1,
+                                   int thresh) {
   const uint8x16_t thresh_v = vdupq_n_u8((uint8_t)thresh);
   const uint8x16_t a_p0_q0 = vabdq_u8(p0, q0);               // abs(p0-q0)
   const uint8x16_t a_p1_q1 = vabdq_u8(p1, q1);               // abs(p1-q1)
@@ -450,18 +437,18 @@ static uint8x16_t NeedsFilter(const uint8x16_t p1, const uint8x16_t p0,
   return mask;
 }
 
-static int8x16_t FlipSign(const uint8x16_t v) {
+static int8x16_t FlipSign_NEON(const uint8x16_t v) {
   const uint8x16_t sign_bit = vdupq_n_u8(0x80);
   return vreinterpretq_s8_u8(veorq_u8(v, sign_bit));
 }
 
-static uint8x16_t FlipSignBack(const int8x16_t v) {
+static uint8x16_t FlipSignBack_NEON(const int8x16_t v) {
   const int8x16_t sign_bit = vdupq_n_s8(0x80);
   return vreinterpretq_u8_s8(veorq_s8(v, sign_bit));
 }
 
-static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0,
-                              const int8x16_t q0, const int8x16_t q1) {
+static int8x16_t GetBaseDelta_NEON(const int8x16_t p1, const int8x16_t p0,
+                                   const int8x16_t q0, const int8x16_t q1) {
   const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
   const int8x16_t p1_q1 = vqsubq_s8(p1, q1);      // (p1-q1)
   const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0);   // (p1-q1) + 1 * (q0 - p0)
@@ -470,7 +457,7 @@ static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0,
   return s3;
 }
 
-static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
+static int8x16_t GetBaseDelta0_NEON(const int8x16_t p0, const int8x16_t q0) {
   const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
   const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0);   // 2 * (q0 - p0)
   const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // 3 * (q0 - p0)
@@ -479,9 +466,10 @@ static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
 
 //------------------------------------------------------------------------------
 
-static void ApplyFilter2NoFlip(const int8x16_t p0s, const int8x16_t q0s,
-                               const int8x16_t delta,
-                               int8x16_t* const op0, int8x16_t* const oq0) {
+static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
+                                    const int8x16_t delta,
+                                    int8x16_t* const op0,
+                                    int8x16_t* const oq0) {
   const int8x16_t kCst3 = vdupq_n_s8(0x03);
   const int8x16_t kCst4 = vdupq_n_s8(0x04);
   const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
@@ -494,9 +482,9 @@ static void ApplyFilter2NoFlip(const int8x16_t p0s, const int8x16_t q0s,
 
 #if defined(WEBP_USE_INTRINSICS)
 
-static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s,
-                         const int8x16_t delta,
-                         uint8x16_t* const op0, uint8x16_t* const oq0) {
+static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
+                              const int8x16_t delta,
+                              uint8x16_t* const op0, uint8x16_t* const oq0) {
   const int8x16_t kCst3 = vdupq_n_s8(0x03);
   const int8x16_t kCst4 = vdupq_n_s8(0x04);
   const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
@@ -505,45 +493,66 @@ static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s,
   const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
   const int8x16_t sp0 = vqaddq_s8(p0s, delta3);
   const int8x16_t sq0 = vqsubq_s8(q0s, delta4);
-  *op0 = FlipSignBack(sp0);
-  *oq0 = FlipSignBack(sq0);
-}
-
-static void DoFilter2(const uint8x16_t p1, const uint8x16_t p0,
-                      const uint8x16_t q0, const uint8x16_t q1,
-                      const uint8x16_t mask,
-                      uint8x16_t* const op0, uint8x16_t* const oq0) {
-  const int8x16_t p1s = FlipSign(p1);
-  const int8x16_t p0s = FlipSign(p0);
-  const int8x16_t q0s = FlipSign(q0);
-  const int8x16_t q1s = FlipSign(q1);
-  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+  *op0 = FlipSignBack_NEON(sp0);
+  *oq0 = FlipSignBack_NEON(sq0);
+}
+
+static void DoFilter2_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                           const uint8x16_t q0, const uint8x16_t q1,
+                           const uint8x16_t mask,
+                           uint8x16_t* const op0, uint8x16_t* const oq0) {
+  const int8x16_t p1s = FlipSign_NEON(p1);
+  const int8x16_t p0s = FlipSign_NEON(p0);
+  const int8x16_t q0s = FlipSign_NEON(q0);
+  const int8x16_t q1s = FlipSign_NEON(q1);
+  const int8x16_t delta0 = GetBaseDelta_NEON(p1s, p0s, q0s, q1s);
   const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask));
-  ApplyFilter2(p0s, q0s, delta1, op0, oq0);
+  ApplyFilter2_NEON(p0s, q0s, delta1, op0, oq0);
 }
 
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
   uint8x16_t p1, p0, q0, q1, op0, oq0;
-  Load16x4(p, stride, &p1, &p0, &q0, &q1);
+  Load16x4_NEON(p, stride, &p1, &p0, &q0, &q1);
   {
-    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
-    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+    const uint8x16_t mask = NeedsFilter_NEON(p1, p0, q0, q1, thresh);
+    DoFilter2_NEON(p1, p0, q0, q1, mask, &op0, &oq0);
   }
-  Store16x2(op0, oq0, p, stride);
+  Store16x2_NEON(op0, oq0, p, stride);
 }
 
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
   uint8x16_t p1, p0, q0, q1, oq0, op0;
-  Load4x16(p, stride, &p1, &p0, &q0, &q1);
+  Load4x16_NEON(p, stride, &p1, &p0, &q0, &q1);
   {
-    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
-    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+    const uint8x16_t mask = NeedsFilter_NEON(p1, p0, q0, q1, thresh);
+    DoFilter2_NEON(p1, p0, q0, q1, mask, &op0, &oq0);
   }
-  Store2x16(op0, oq0, p, stride);
+  Store2x16_NEON(op0, oq0, p, stride);
 }
 
 #else
 
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
+  "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
+
+#define STORE8x2(c1, c2, p, stride)                                            \
+  "vst2.8   {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
+
 #define QRegs "q0", "q1", "q2", "q3",                                          \
               "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
 
@@ -592,7 +601,7 @@ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
   DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
   FLIP_SIGN_BIT2(p0, q0, q10)
 
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
   __asm__ volatile (
     "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
 
@@ -613,7 +622,7 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
   );
 }
 
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
   __asm__ volatile (
     "sub        r4, %[p], #2                   \n"  // base1 = p - 2
     "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
@@ -639,30 +648,33 @@ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
   );
 }
 
+#undef LOAD8x4
+#undef STORE8x2
+
 #endif    // WEBP_USE_INTRINSICS
 
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i_NEON(uint8_t* p, int stride, int thresh) {
   uint32_t k;
   for (k = 3; k != 0; --k) {
     p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
+    SimpleVFilter16_NEON(p, stride, thresh);
   }
 }
 
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i_NEON(uint8_t* p, int stride, int thresh) {
   uint32_t k;
   for (k = 3; k != 0; --k) {
     p += 4;
-    SimpleHFilter16(p, stride, thresh);
+    SimpleHFilter16_NEON(p, stride, thresh);
   }
 }
 
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)
 
-static uint8x16_t NeedsHev(const uint8x16_t p1, const uint8x16_t p0,
-                           const uint8x16_t q0, const uint8x16_t q1,
-                           int hev_thresh) {
+static uint8x16_t NeedsHev_NEON(const uint8x16_t p1, const uint8x16_t p0,
+                                const uint8x16_t q0, const uint8x16_t q1,
+                                int hev_thresh) {
   const uint8x16_t hev_thresh_v = vdupq_n_u8((uint8_t)hev_thresh);
   const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0);  // abs(p1 - p0)
   const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0);  // abs(q1 - q0)
@@ -671,11 +683,11 @@ static uint8x16_t NeedsHev(const uint8x16_t p1, const uint8x16_t p0,
   return mask;
 }
 
-static uint8x16_t NeedsFilter2(const uint8x16_t p3, const uint8x16_t p2,
-                               const uint8x16_t p1, const uint8x16_t p0,
-                               const uint8x16_t q0, const uint8x16_t q1,
-                               const uint8x16_t q2, const uint8x16_t q3,
-                               int ithresh, int thresh) {
+static uint8x16_t NeedsFilter2_NEON(const uint8x16_t p3, const uint8x16_t p2,
+                                    const uint8x16_t p1, const uint8x16_t p0,
+                                    const uint8x16_t q0, const uint8x16_t q1,
+                                    const uint8x16_t q2, const uint8x16_t q3,
+                                    int ithresh, int thresh) {
   const uint8x16_t ithresh_v = vdupq_n_u8((uint8_t)ithresh);
   const uint8x16_t a_p3_p2 = vabdq_u8(p3, p2);  // abs(p3 - p2)
   const uint8x16_t a_p2_p1 = vabdq_u8(p2, p1);  // abs(p2 - p1)
@@ -689,14 +701,14 @@ static uint8x16_t NeedsFilter2(const uint8x16_t p3, const uint8x16_t p2,
   const uint8x16_t max12 = vmaxq_u8(max1, max2);
   const uint8x16_t max123 = vmaxq_u8(max12, max3);
   const uint8x16_t mask2 = vcgeq_u8(ithresh_v, max123);
-  const uint8x16_t mask1 = NeedsFilter(p1, p0, q0, q1, thresh);
+  const uint8x16_t mask1 = NeedsFilter_NEON(p1, p0, q0, q1, thresh);
   const uint8x16_t mask = vandq_u8(mask1, mask2);
   return mask;
 }
 
 //  4-points filter
 
-static void ApplyFilter4(
+static void ApplyFilter4_NEON(
     const int8x16_t p1, const int8x16_t p0,
     const int8x16_t q0, const int8x16_t q1,
     const int8x16_t delta0,
@@ -709,47 +721,47 @@ static void ApplyFilter4(
   const int8x16_t a1 = vshrq_n_s8(delta1, 3);
   const int8x16_t a2 = vshrq_n_s8(delta2, 3);
   const int8x16_t a3 = vrshrq_n_s8(a1, 1);   // a3 = (a1 + 1) >> 1
-  *op0 = FlipSignBack(vqaddq_s8(p0, a2));  // clip(p0 + a2)
-  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - a1)
-  *op1 = FlipSignBack(vqaddq_s8(p1, a3));  // clip(p1 + a3)
-  *oq1 = FlipSignBack(vqsubq_s8(q1, a3));  // clip(q1 - a3)
+  *op0 = FlipSignBack_NEON(vqaddq_s8(p0, a2));  // clip(p0 + a2)
+  *oq0 = FlipSignBack_NEON(vqsubq_s8(q0, a1));  // clip(q0 - a1)
+  *op1 = FlipSignBack_NEON(vqaddq_s8(p1, a3));  // clip(p1 + a3)
+  *oq1 = FlipSignBack_NEON(vqsubq_s8(q1, a3));  // clip(q1 - a3)
 }
 
-static void DoFilter4(
+static void DoFilter4_NEON(
     const uint8x16_t p1, const uint8x16_t p0,
     const uint8x16_t q0, const uint8x16_t q1,
     const uint8x16_t mask, const uint8x16_t hev_mask,
     uint8x16_t* const op1, uint8x16_t* const op0,
     uint8x16_t* const oq0, uint8x16_t* const oq1) {
   // This is a fused version of DoFilter2() calling ApplyFilter2 directly
-  const int8x16_t p1s = FlipSign(p1);
-  int8x16_t p0s = FlipSign(p0);
-  int8x16_t q0s = FlipSign(q0);
-  const int8x16_t q1s = FlipSign(q1);
+  const int8x16_t p1s = FlipSign_NEON(p1);
+  int8x16_t p0s = FlipSign_NEON(p0);
+  int8x16_t q0s = FlipSign_NEON(q0);
+  const int8x16_t q1s = FlipSign_NEON(q1);
   const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
 
   // do_filter2 part (simple loopfilter on pixels with hev)
   {
-    const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s);
+    const int8x16_t delta = GetBaseDelta_NEON(p1s, p0s, q0s, q1s);
     const int8x16_t simple_lf_delta =
         vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask));
-    ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+    ApplyFilter2NoFlip_NEON(p0s, q0s, simple_lf_delta, &p0s, &q0s);
   }
 
   // do_filter4 part (complex loopfilter on pixels without hev)
   {
-    const int8x16_t delta0 = GetBaseDelta0(p0s, q0s);
+    const int8x16_t delta0 = GetBaseDelta0_NEON(p0s, q0s);
     // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
     const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
     const int8x16_t complex_lf_delta =
         vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
-    ApplyFilter4(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
+    ApplyFilter4_NEON(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
   }
 }
 
 //  6-points filter
 
-static void ApplyFilter6(
+static void ApplyFilter6_NEON(
     const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
     const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
     const int8x16_t delta,
@@ -778,35 +790,35 @@ static void ApplyFilter6(
   const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
   const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi);
 
-  *op0 = FlipSignBack(vqaddq_s8(p0, a1));  // clip(p0 + a1)
-  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - q1)
-  *oq1 = FlipSignBack(vqsubq_s8(q1, a2));  // clip(q1 - a2)
-  *op1 = FlipSignBack(vqaddq_s8(p1, a2));  // clip(p1 + a2)
-  *oq2 = FlipSignBack(vqsubq_s8(q2, a3));  // clip(q2 - a3)
-  *op2 = FlipSignBack(vqaddq_s8(p2, a3));  // clip(p2 + a3)
+  *op0 = FlipSignBack_NEON(vqaddq_s8(p0, a1));  // clip(p0 + a1)
+  *oq0 = FlipSignBack_NEON(vqsubq_s8(q0, a1));  // clip(q0 - q1)
+  *oq1 = FlipSignBack_NEON(vqsubq_s8(q1, a2));  // clip(q1 - a2)
+  *op1 = FlipSignBack_NEON(vqaddq_s8(p1, a2));  // clip(p1 + a2)
+  *oq2 = FlipSignBack_NEON(vqsubq_s8(q2, a3));  // clip(q2 - a3)
+  *op2 = FlipSignBack_NEON(vqaddq_s8(p2, a3));  // clip(p2 + a3)
 }
 
-static void DoFilter6(
+static void DoFilter6_NEON(
     const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
     const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
     const uint8x16_t mask, const uint8x16_t hev_mask,
     uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
     uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
   // This is a fused version of DoFilter2() calling ApplyFilter2 directly
-  const int8x16_t p2s = FlipSign(p2);
-  const int8x16_t p1s = FlipSign(p1);
-  int8x16_t p0s = FlipSign(p0);
-  int8x16_t q0s = FlipSign(q0);
-  const int8x16_t q1s = FlipSign(q1);
-  const int8x16_t q2s = FlipSign(q2);
+  const int8x16_t p2s = FlipSign_NEON(p2);
+  const int8x16_t p1s = FlipSign_NEON(p1);
+  int8x16_t p0s = FlipSign_NEON(p0);
+  int8x16_t q0s = FlipSign_NEON(q0);
+  const int8x16_t q1s = FlipSign_NEON(q1);
+  const int8x16_t q2s = FlipSign_NEON(q2);
   const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
-  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+  const int8x16_t delta0 = GetBaseDelta_NEON(p1s, p0s, q0s, q1s);
 
   // do_filter2 part (simple loopfilter on pixels with hev)
   {
     const int8x16_t simple_lf_delta =
         vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask));
-    ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+    ApplyFilter2NoFlip_NEON(p0s, q0s, simple_lf_delta, &p0s, &q0s);
   }
 
   // do_filter6 part (complex loopfilter on pixels without hev)
@@ -815,65 +827,65 @@ static void DoFilter6(
     const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
     const int8x16_t complex_lf_delta =
         vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
-    ApplyFilter6(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
-                 op2, op1, op0, oq0, oq1, oq2);
+    ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
+                      op2, op1, op0, oq0, oq1, oq2);
   }
 }
 
 // on macroblock edges
 
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void VFilter16_NEON(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
-  Load16x8(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load16x8_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-              &op2, &op1, &op0, &oq0, &oq1, &oq2);
-    Store16x2(op2, op1, p - 2 * stride, stride);
-    Store16x2(op0, oq0, p + 0 * stride, stride);
-    Store16x2(oq1, oq2, p + 2 * stride, stride);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store16x2_NEON(op2, op1, p - 2 * stride, stride);
+    Store16x2_NEON(op0, oq0, p + 0 * stride, stride);
+    Store16x2_NEON(oq1, oq2, p + 2 * stride, stride);
   }
 }
 
-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void HFilter16_NEON(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
-  Load8x16(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load8x16_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-              &op2, &op1, &op0, &oq0, &oq1, &oq2);
-    Store2x16(op2, op1, p - 2, stride);
-    Store2x16(op0, oq0, p + 0, stride);
-    Store2x16(oq1, oq2, p + 2, stride);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store2x16_NEON(op2, op1, p - 2, stride);
+    Store2x16_NEON(op0, oq0, p + 0, stride);
+    Store2x16_NEON(oq1, oq2, p + 2, stride);
   }
 }
 
 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_NEON(uint8_t* p, int stride,
+                            int thresh, int ithresh, int hev_thresh) {
   uint32_t k;
   uint8x16_t p3, p2, p1, p0;
-  Load16x4(p + 2  * stride, stride, &p3, &p2, &p1, &p0);
+  Load16x4_NEON(p + 2  * stride, stride, &p3, &p2, &p1, &p0);
   for (k = 3; k != 0; --k) {
     uint8x16_t q0, q1, q2, q3;
     p += 4 * stride;
-    Load16x4(p + 2  * stride, stride, &q0, &q1, &q2, &q3);
+    Load16x4_NEON(p + 2  * stride, stride, &q0, &q1, &q2, &q3);
     {
       const uint8x16_t mask =
-          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
-      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+          NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
       // p3 and p2 are not just temporary variables here: they will be
       // re-used for next span. And q2/q3 will become p1/p0 accordingly.
-      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
-      Store16x4(p1, p0, p3, p2, p, stride);
+      DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store16x4_NEON(p1, p0, p3, p2, p, stride);
       p1 = q2;
       p0 = q3;
     }
@@ -881,21 +893,21 @@ static void VFilter16i(uint8_t* p, int stride,
 }
 
 #if !defined(WORK_AROUND_GCC)
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i_NEON(uint8_t* p, int stride,
+                            int thresh, int ithresh, int hev_thresh) {
   uint32_t k;
   uint8x16_t p3, p2, p1, p0;
-  Load4x16(p + 2, stride, &p3, &p2, &p1, &p0);
+  Load4x16_NEON(p + 2, stride, &p3, &p2, &p1, &p0);
   for (k = 3; k != 0; --k) {
     uint8x16_t q0, q1, q2, q3;
     p += 4;
-    Load4x16(p + 2, stride, &q0, &q1, &q2, &q3);
+    Load4x16_NEON(p + 2, stride, &q0, &q1, &q2, &q3);
     {
       const uint8x16_t mask =
-          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
-      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
-      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
-      Store4x16(p1, p0, p3, p2, p, stride);
+          NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
+      DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store4x16_NEON(p1, p0, p3, p2, p, stride);
       p1 = q2;
       p0 = q3;
     }
@@ -904,67 +916,67 @@ static void HFilter16i(uint8_t* p, int stride,
 #endif  // !WORK_AROUND_GCC
 
 // 8-pixels wide variant, for chroma filtering
-static void VFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
+static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
-  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-              &op2, &op1, &op0, &oq0, &oq1, &oq2);
-    Store8x2x2(op2, op1, u - 2 * stride, v - 2 * stride, stride);
-    Store8x2x2(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
-    Store8x2x2(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store8x2x2_NEON(op2, op1, u - 2 * stride, v - 2 * stride, stride);
+    Store8x2x2_NEON(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
+    Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
   }
 }
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
   u += 4 * stride;
   v += 4 * stride;
-  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op1, op0, oq0, oq1;
-    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
-    Store8x4x2(op1, op0, oq0, oq1, u, v, stride);
+    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store8x4x2_NEON(op1, op0, oq0, oq1, u, v, stride);
   }
 }
 
 #if !defined(WORK_AROUND_GCC)
-static void HFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
+static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
-  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op2, op1, op0, oq0, oq1, oq2;
-    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
-              &op2, &op1, &op0, &oq0, &oq1, &oq2);
-    Store6x8x2(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
+    DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+                   &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store6x8x2_NEON(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
   }
 }
 
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
   u += 4;
   v += 4;
-  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
   {
-    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
-                                         ithresh, thresh);
-    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
+                                              ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
     uint8x16_t op1, op0, oq0, oq1;
-    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
-    Store4x8x2(op1, op0, oq0, oq1, u, v, stride);
+    DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store4x8x2_NEON(op1, op0, oq0, oq1, u, v, stride);
   }
 }
 #endif  // !WORK_AROUND_GCC
@@ -992,8 +1004,9 @@ static const int16_t kC1 = 20091;
 static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
 
 #if defined(WEBP_USE_INTRINSICS)
-static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
-                                     int16x8x2_t* const out) {
+static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
+                                          const int16x8_t in1,
+                                          int16x8x2_t* const out) {
   // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
   // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
   const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
@@ -1001,7 +1014,7 @@ static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
   *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
 }
 
-static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
   // {rows} = in0 | in4
   //          in8 | in12
   // B1 = in4 | in12
@@ -1024,20 +1037,20 @@ static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
   const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
   const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
   const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
-  Transpose8x2(E0, E1, rows);
+  Transpose8x2_NEON(E0, E1, rows);
 }
 
-static void TransformOne(const int16_t* in, uint8_t* dst) {
+static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
   int16x8x2_t rows;
   INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
-  TransformPass(&rows);
-  TransformPass(&rows);
-  Add4x4(rows.val[0], rows.val[1], dst);
+  TransformPass_NEON(&rows);
+  TransformPass_NEON(&rows);
+  Add4x4_NEON(rows.val[0], rows.val[1], dst);
 }
 
 #else
 
-static void TransformOne(const int16_t* in, uint8_t* dst) {
+static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
   const int kBPS = BPS;
   // kC1, kC2. Padded because vld1.16 loads 8 bytes
   const int16_t constants[4] = { kC1, kC2, 0, 0 };
@@ -1170,16 +1183,16 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
 
 #endif    // WEBP_USE_INTRINSICS
 
-static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOne(in, dst);
+static void TransformTwo_NEON(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne_NEON(in, dst);
   if (do_two) {
-    TransformOne(in + 16, dst + 4);
+    TransformOne_NEON(in + 16, dst + 4);
   }
 }
 
-static void TransformDC(const int16_t* in, uint8_t* dst) {
+static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
   const int16x8_t DC = vdupq_n_s16(in[0]);
-  Add4x4(DC, DC, dst);
+  Add4x4_NEON(DC, DC, dst);
 }
 
 //------------------------------------------------------------------------------
@@ -1191,7 +1204,7 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
   *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
 } while (0)
 
-static void TransformWHT(const int16_t* in, int16_t* out) {
+static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
   int32x4x4_t tmp;
 
   {
@@ -1209,7 +1222,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
     tmp.val[2] = vsubq_s32(a0, a1);
     tmp.val[3] = vsubq_s32(a3, a2);
     // Arrange the temporary results column-wise.
-    tmp = Transpose4x4(tmp);
+    tmp = Transpose4x4_NEON(tmp);
   }
 
   {
@@ -1243,7 +1256,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
 //------------------------------------------------------------------------------
 
 #define MUL(a, b) (((a) * (b)) >> 16)
-static void TransformAC3(const int16_t* in, uint8_t* dst) {
+static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
   static const int kC1_full = 20091 + (1 << 16);
   static const int kC2_full = 35468;
   const int16x4_t A = vld1_dup_s16(in);
@@ -1259,14 +1272,14 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
   const int16x4_t B = vqadd_s16(A, CD);
   const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
   const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
-  Add4x4(m0_m1, m2_m3, dst);
+  Add4x4_NEON(m0_m1, m2_m3, dst);
 }
 #undef MUL
 
 //------------------------------------------------------------------------------
 // 4x4
 
-static void DC4(uint8_t* dst) {    // DC
+static void DC4_NEON(uint8_t* dst) {    // DC
   const uint8x8_t A = vld1_u8(dst - BPS);  // top row
   const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
   const uint16x4_t p1 = vpadd_u16(p0, p0);
@@ -1287,17 +1300,17 @@ static void DC4(uint8_t* dst) {    // DC
 }
 
 // TrueMotion (4x4 + 8x8)
-static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
   const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
   const uint8x8_t T = vld1_u8(dst - BPS);  // top row 'A[0..3]'
   const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL));  // A[c] - A[-1]
   int y;
   for (y = 0; y < size; y += 4) {
     // left edge
-    const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
-    const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
-    const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
-    const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+    const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
+    const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
+    const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
+    const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
     const int16x8_t r0 = vaddq_s16(L0, d);  // L[r] + A[c] - A[-1]
     const int16x8_t r1 = vaddq_s16(L1, d);
     const int16x8_t r2 = vaddq_s16(L2, d);
@@ -1322,9 +1335,9 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
   }
 }
 
-static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
+static void TM4_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 4); }
 
-static void VE4(uint8_t* dst) {    // vertical
+static void VE4_NEON(uint8_t* dst) {    // vertical
   // NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
   const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1));  // top row
   const uint64x1_t A1 = vshr_n_u64(A0, 8);
@@ -1340,7 +1353,7 @@ static void VE4(uint8_t* dst) {    // vertical
   }
 }
 
-static void RD4(uint8_t* dst) {   // Down-right
+static void RD4_NEON(uint8_t* dst) {   // Down-right
   const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
   const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
   const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
@@ -1368,7 +1381,7 @@ static void RD4(uint8_t* dst) {   // Down-right
   vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
 }
 
-static void LD4(uint8_t* dst) {    // Down-left
+static void LD4_NEON(uint8_t* dst) {    // Down-left
   // Note using the same shift trick as VE4() is slower here.
   const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
   const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
@@ -1390,7 +1403,7 @@ static void LD4(uint8_t* dst) {    // Down-left
 //------------------------------------------------------------------------------
 // Chroma
 
-static void VE8uv(uint8_t* dst) {    // vertical
+static void VE8uv_NEON(uint8_t* dst) {    // vertical
   const uint8x8_t top = vld1_u8(dst - BPS);
   int j;
   for (j = 0; j < 8; ++j) {
@@ -1398,7 +1411,7 @@ static void VE8uv(uint8_t* dst) {    // vertical
   }
 }
 
-static void HE8uv(uint8_t* dst) {    // horizontal
+static void HE8uv_NEON(uint8_t* dst) {    // horizontal
   int j;
   for (j = 0; j < 8; ++j) {
     const uint8x8_t left = vld1_dup_u8(dst - 1);
@@ -1407,7 +1420,7 @@ static void HE8uv(uint8_t* dst) {    // horizontal
   }
 }
 
-static WEBP_INLINE void DC8(uint8_t* dst, int do_top, int do_left) {
+static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
   uint16x8_t sum_top;
   uint16x8_t sum_left;
   uint8x8_t dc0;
@@ -1458,17 +1471,17 @@ static WEBP_INLINE void DC8(uint8_t* dst, int do_top, int do_left) {
   }
 }
 
-static void DC8uv(uint8_t* dst) { DC8(dst, 1, 1); }
-static void DC8uvNoTop(uint8_t* dst) { DC8(dst, 0, 1); }
-static void DC8uvNoLeft(uint8_t* dst) { DC8(dst, 1, 0); }
-static void DC8uvNoTopLeft(uint8_t* dst) { DC8(dst, 0, 0); }
+static void DC8uv_NEON(uint8_t* dst) { DC8_NEON(dst, 1, 1); }
+static void DC8uvNoTop_NEON(uint8_t* dst) { DC8_NEON(dst, 0, 1); }
+static void DC8uvNoLeft_NEON(uint8_t* dst) { DC8_NEON(dst, 1, 0); }
+static void DC8uvNoTopLeft_NEON(uint8_t* dst) { DC8_NEON(dst, 0, 0); }
 
-static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM8uv_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 8); }
 
 //------------------------------------------------------------------------------
 // 16x16
 
-static void VE16(uint8_t* dst) {     // vertical
+static void VE16_NEON(uint8_t* dst) {     // vertical
   const uint8x16_t top = vld1q_u8(dst - BPS);
   int j;
   for (j = 0; j < 16; ++j) {
@@ -1476,7 +1489,7 @@ static void VE16(uint8_t* dst) {     // vertical
   }
 }
 
-static void HE16(uint8_t* dst) {     // horizontal
+static void HE16_NEON(uint8_t* dst) {     // horizontal
   int j;
   for (j = 0; j < 16; ++j) {
     const uint8x16_t left = vld1q_dup_u8(dst - 1);
@@ -1485,7 +1498,7 @@ static void HE16(uint8_t* dst) {     // horizontal
   }
 }
 
-static WEBP_INLINE void DC16(uint8_t* dst, int do_top, int do_left) {
+static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) {
   uint16x8_t sum_top;
   uint16x8_t sum_left;
   uint8x8_t dc0;
@@ -1542,12 +1555,12 @@ static WEBP_INLINE void DC16(uint8_t* dst, int do_top, int do_left) {
   }
 }
 
-static void DC16TopLeft(uint8_t* dst) { DC16(dst, 1, 1); }
-static void DC16NoTop(uint8_t* dst) { DC16(dst, 0, 1); }
-static void DC16NoLeft(uint8_t* dst) { DC16(dst, 1, 0); }
-static void DC16NoTopLeft(uint8_t* dst) { DC16(dst, 0, 0); }
+static void DC16TopLeft_NEON(uint8_t* dst) { DC16_NEON(dst, 1, 1); }
+static void DC16NoTop_NEON(uint8_t* dst) { DC16_NEON(dst, 0, 1); }
+static void DC16NoLeft_NEON(uint8_t* dst) { DC16_NEON(dst, 1, 0); }
+static void DC16NoTopLeft_NEON(uint8_t* dst) { DC16_NEON(dst, 0, 0); }
 
-static void TM16(uint8_t* dst) {
+static void TM16_NEON(uint8_t* dst) {
   const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
   const uint8x16_t T = vld1q_u8(dst - BPS);  // top row 'A[0..15]'
   // A[c] - A[-1]
@@ -1556,10 +1569,10 @@ static void TM16(uint8_t* dst) {
   int y;
   for (y = 0; y < 16; y += 4) {
     // left edge
-    const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
-    const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
-    const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
-    const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+    const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
+    const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
+    const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
+    const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
     const int16x8_t r0_lo = vaddq_s16(L0, d_lo);  // L[r] + A[c] - A[-1]
     const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
     const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
@@ -1587,49 +1600,49 @@ static void TM16(uint8_t* dst) {
 extern void VP8DspInitNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) {
-  VP8Transform = TransformTwo;
-  VP8TransformAC3 = TransformAC3;
-  VP8TransformDC = TransformDC;
-  VP8TransformWHT = TransformWHT;
-
-  VP8VFilter16 = VFilter16;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16 = HFilter16;
+  VP8Transform = TransformTwo_NEON;
+  VP8TransformAC3 = TransformAC3_NEON;
+  VP8TransformDC = TransformDC_NEON;
+  VP8TransformWHT = TransformWHT_NEON;
+
+  VP8VFilter16 = VFilter16_NEON;
+  VP8VFilter16i = VFilter16i_NEON;
+  VP8HFilter16 = HFilter16_NEON;
 #if !defined(WORK_AROUND_GCC)
-  VP8HFilter16i = HFilter16i;
+  VP8HFilter16i = HFilter16i_NEON;
 #endif
-  VP8VFilter8 = VFilter8;
-  VP8VFilter8i = VFilter8i;
+  VP8VFilter8 = VFilter8_NEON;
+  VP8VFilter8i = VFilter8i_NEON;
 #if !defined(WORK_AROUND_GCC)
-  VP8HFilter8 = HFilter8;
-  VP8HFilter8i = HFilter8i;
+  VP8HFilter8 = HFilter8_NEON;
+  VP8HFilter8i = HFilter8i_NEON;
 #endif
-  VP8SimpleVFilter16 = SimpleVFilter16;
-  VP8SimpleHFilter16 = SimpleHFilter16;
-  VP8SimpleVFilter16i = SimpleVFilter16i;
-  VP8SimpleHFilter16i = SimpleHFilter16i;
-
-  VP8PredLuma4[0] = DC4;
-  VP8PredLuma4[1] = TM4;
-  VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[6] = LD4;
-
-  VP8PredLuma16[0] = DC16TopLeft;
-  VP8PredLuma16[1] = TM16;
-  VP8PredLuma16[2] = VE16;
-  VP8PredLuma16[3] = HE16;
-  VP8PredLuma16[4] = DC16NoTop;
-  VP8PredLuma16[5] = DC16NoLeft;
-  VP8PredLuma16[6] = DC16NoTopLeft;
-
-  VP8PredChroma8[0] = DC8uv;
-  VP8PredChroma8[1] = TM8uv;
-  VP8PredChroma8[2] = VE8uv;
-  VP8PredChroma8[3] = HE8uv;
-  VP8PredChroma8[4] = DC8uvNoTop;
-  VP8PredChroma8[5] = DC8uvNoLeft;
-  VP8PredChroma8[6] = DC8uvNoTopLeft;
+  VP8SimpleVFilter16 = SimpleVFilter16_NEON;
+  VP8SimpleHFilter16 = SimpleHFilter16_NEON;
+  VP8SimpleVFilter16i = SimpleVFilter16i_NEON;
+  VP8SimpleHFilter16i = SimpleHFilter16i_NEON;
+
+  VP8PredLuma4[0] = DC4_NEON;
+  VP8PredLuma4[1] = TM4_NEON;
+  VP8PredLuma4[2] = VE4_NEON;
+  VP8PredLuma4[4] = RD4_NEON;
+  VP8PredLuma4[6] = LD4_NEON;
+
+  VP8PredLuma16[0] = DC16TopLeft_NEON;
+  VP8PredLuma16[1] = TM16_NEON;
+  VP8PredLuma16[2] = VE16_NEON;
+  VP8PredLuma16[3] = HE16_NEON;
+  VP8PredLuma16[4] = DC16NoTop_NEON;
+  VP8PredLuma16[5] = DC16NoLeft_NEON;
+  VP8PredLuma16[6] = DC16NoTopLeft_NEON;
+
+  VP8PredChroma8[0] = DC8uv_NEON;
+  VP8PredChroma8[1] = TM8uv_NEON;
+  VP8PredChroma8[2] = VE8uv_NEON;
+  VP8PredChroma8[3] = HE8uv_NEON;
+  VP8PredChroma8[4] = DC8uvNoTop_NEON;
+  VP8PredChroma8[5] = DC8uvNoLeft_NEON;
+  VP8PredChroma8[6] = DC8uvNoTopLeft_NEON;
 }
 
 #else  // !WEBP_USE_NEON
diff --git a/thirdparty/libwebp/dsp/dec_sse2.c b/thirdparty/libwebp/src/dsp/dec_sse2.c
index 411fb02768..b3840faf3a 100644
--- a/thirdparty/libwebp/dsp/dec_sse2.c
+++ b/thirdparty/libwebp/src/dsp/dec_sse2.c
@@ -12,23 +12,25 @@
 // Author: somnath@google.com (Somnath Banerjee)
 //         cduvivier@google.com (Christian Duvivier)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 
 // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
 // one it seems => disable it by default. Uncomment the following to enable:
-// #define USE_TRANSFORM_AC3
+#if !defined(USE_TRANSFORM_AC3)
+#define USE_TRANSFORM_AC3 0   // ALTERNATE_CODE
+#endif
 
 #include <emmintrin.h>
-#include "./common_sse2.h"
-#include "../dec/vp8i_dec.h"
-#include "../utils/utils.h"
+#include "src/dsp/common_sse2.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
-static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
+static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
   // This implementation makes use of 16-bit fixed point versions of two
   // multiply constants:
   //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
   }
 }
 
-#if defined(USE_TRANSFORM_AC3)
+#if (USE_TRANSFORM_AC3 == 1)
 #define MUL(a, b) (((a) * (b)) >> 16)
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
   static const int kC1 = 20091 + (1 << 16);
@@ -248,7 +250,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
     _mm_subs_epu8((p), (q)))
 
 // Shift each byte of "x" by 3 bits while preserving by the sign bit.
-static WEBP_INLINE void SignedShift8b(__m128i* const x) {
+static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
   const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
@@ -258,8 +260,8 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
 }
 
 #define FLIP_SIGN_BIT2(a, b) {                                                 \
-  a = _mm_xor_si128(a, sign_bit);                                              \
-  b = _mm_xor_si128(b, sign_bit);                                              \
+  (a) = _mm_xor_si128(a, sign_bit);                                            \
+  (b) = _mm_xor_si128(b, sign_bit);                                            \
 }
 
 #define FLIP_SIGN_BIT4(a, b, c, d) {                                           \
@@ -268,11 +270,11 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
 }
 
 // input/output is uint8_t
-static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
-                                  const __m128i* const p0,
-                                  const __m128i* const q0,
-                                  const __m128i* const q1,
-                                  int hev_thresh, __m128i* const not_hev) {
+static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
+                                       const __m128i* const p0,
+                                       const __m128i* const q0,
+                                       const __m128i* const q1,
+                                       int hev_thresh, __m128i* const not_hev) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i t_1 = MM_ABS(*p1, *p0);
   const __m128i t_2 = MM_ABS(*q1, *q0);
@@ -285,11 +287,11 @@ static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
 }
 
 // input pixels are int8_t
-static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
-                                     const __m128i* const p0,
-                                     const __m128i* const q0,
-                                     const __m128i* const q1,
-                                     __m128i* const delta) {
+static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
+                                          const __m128i* const p0,
+                                          const __m128i* const q0,
+                                          const __m128i* const q1,
+                                          __m128i* const delta) {
   // beware of addition order, for saturation!
   const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1);   // p1 - q1
   const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0);   // q0 - p0
@@ -300,15 +302,16 @@ static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
 }
 
 // input and output are int8_t
-static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
-                                       const __m128i* const fl) {
+static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
+                                            __m128i* const q0,
+                                            const __m128i* const fl) {
   const __m128i k3 = _mm_set1_epi8(3);
   const __m128i k4 = _mm_set1_epi8(4);
   __m128i v3 = _mm_adds_epi8(*fl, k3);
   __m128i v4 = _mm_adds_epi8(*fl, k4);
 
-  SignedShift8b(&v4);                  // v4 >> 3
-  SignedShift8b(&v3);                  // v3 >> 3
+  SignedShift8b_SSE2(&v4);             // v4 >> 3
+  SignedShift8b_SSE2(&v3);             // v3 >> 3
   *q0 = _mm_subs_epi8(*q0, v4);        // q0 -= v4
   *p0 = _mm_adds_epi8(*p0, v3);        // p0 += v3
 }
@@ -317,9 +320,9 @@ static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
 // Update operations:
 // q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
 // Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
-static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
-                                      const __m128i* const a0_lo,
-                                      const __m128i* const a0_hi) {
+static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
+                                           const __m128i* const a0_lo,
+                                           const __m128i* const a0_hi) {
   const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
   const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
   const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
@@ -330,11 +333,11 @@ static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
 }
 
 // input pixels are uint8_t
-static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
-                                    const __m128i* const p0,
-                                    const __m128i* const q0,
-                                    const __m128i* const q1,
-                                    int thresh, __m128i* const mask) {
+static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
+                                         const __m128i* const p0,
+                                         const __m128i* const q0,
+                                         const __m128i* const q1,
+                                         int thresh, __m128i* const mask) {
   const __m128i m_thresh = _mm_set1_epi8(thresh);
   const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
   const __m128i kFE = _mm_set1_epi8(0xFE);
@@ -353,28 +356,29 @@ static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
 // Edge filtering functions
 
 // Applies filter on 2 pixels (p0 and q0)
-static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
-                                  __m128i* const q0, __m128i* const q1,
-                                  int thresh) {
+static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
+                                       __m128i* const q0, __m128i* const q1,
+                                       int thresh) {
   __m128i a, mask;
   const __m128i sign_bit = _mm_set1_epi8(0x80);
-  // convert p1/q1 to int8_t (for GetBaseDelta)
+  // convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
   const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
   const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
 
-  NeedsFilter(p1, p0, q0, q1, thresh, &mask);
+  NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
 
   FLIP_SIGN_BIT2(*p0, *q0);
-  GetBaseDelta(&p1s, p0, q0, &q1s, &a);
+  GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
   a = _mm_and_si128(a, mask);     // mask filter values we don't care about
-  DoSimpleFilter(p0, q0, &a);
+  DoSimpleFilter_SSE2(p0, q0, &a);
   FLIP_SIGN_BIT2(*p0, *q0);
 }
 
 // Applies filter on 4 pixels (p1, p0, q0 and q1)
-static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
-                                  __m128i* const q0, __m128i* const q1,
-                                  const __m128i* const mask, int hev_thresh) {
+static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
+                                       __m128i* const q0, __m128i* const q1,
+                                       const __m128i* const mask,
+                                       int hev_thresh) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i sign_bit = _mm_set1_epi8(0x80);
   const __m128i k64 = _mm_set1_epi8(64);
@@ -384,7 +388,7 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
   __m128i t1, t2, t3;
 
   // compute hev mask
-  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+  GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
 
   // convert to signed values
   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
@@ -399,8 +403,8 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
 
   t2 = _mm_adds_epi8(t1, k3);        // 3 * (q0 - p0) + hev(p1 - q1) + 3
   t3 = _mm_adds_epi8(t1, k4);        // 3 * (q0 - p0) + hev(p1 - q1) + 4
-  SignedShift8b(&t2);                // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
-  SignedShift8b(&t3);                // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+  SignedShift8b_SSE2(&t2);           // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  SignedShift8b_SSE2(&t3);           // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
   *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
   *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
   FLIP_SIGN_BIT2(*p0, *q0);
@@ -417,25 +421,26 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
 }
 
 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
-static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
-                                  __m128i* const p0, __m128i* const q0,
-                                  __m128i* const q1, __m128i* const q2,
-                                  const __m128i* const mask, int hev_thresh) {
+static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
+                                       __m128i* const p0, __m128i* const q0,
+                                       __m128i* const q1, __m128i* const q2,
+                                       const __m128i* const mask,
+                                       int hev_thresh) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i sign_bit = _mm_set1_epi8(0x80);
   __m128i a, not_hev;
 
   // compute hev mask
-  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+  GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
 
   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
   FLIP_SIGN_BIT2(*p2, *q2);
-  GetBaseDelta(p1, p0, q0, q1, &a);
+  GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
 
   { // do simple filter on pixels with hev
     const __m128i m = _mm_andnot_si128(not_hev, *mask);
     const __m128i f = _mm_and_si128(a, m);
-    DoSimpleFilter(p0, q0, &f);
+    DoSimpleFilter_SSE2(p0, q0, &f);
   }
 
   { // do strong filter on pixels with not hev
@@ -460,15 +465,15 @@ static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
     const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo);  // Filter * 27 + 63
     const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi);  // Filter * 27 + 63
 
-    Update2Pixels(p2, q2, &a2_lo, &a2_hi);
-    Update2Pixels(p1, q1, &a1_lo, &a1_hi);
-    Update2Pixels(p0, q0, &a0_lo, &a0_hi);
+    Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
+    Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
+    Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
   }
 }
 
 // reads 8 rows across a vertical edge.
-static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
-                                __m128i* const p, __m128i* const q) {
+static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
+                                     __m128i* const p, __m128i* const q) {
   // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
   // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
   const __m128i A0 = _mm_set_epi32(
@@ -494,11 +499,11 @@ static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
   *q = _mm_unpackhi_epi32(C0, C1);
 }
 
-static WEBP_INLINE void Load16x4(const uint8_t* const r0,
-                                 const uint8_t* const r8,
-                                 int stride,
-                                 __m128i* const p1, __m128i* const p0,
-                                 __m128i* const q0, __m128i* const q1) {
+static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
+                                      const uint8_t* const r8,
+                                      int stride,
+                                      __m128i* const p1, __m128i* const p0,
+                                      __m128i* const q0, __m128i* const q1) {
   // Assume the pixels around the edge (|) are numbered as follows
   //                00 01 | 02 03
   //                10 11 | 12 13
@@ -514,8 +519,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
   // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
   // p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
   // q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-  Load8x4(r0, stride, p1, q0);
-  Load8x4(r8, stride, p0, q1);
+  Load8x4_SSE2(r0, stride, p1, q0);
+  Load8x4_SSE2(r8, stride, p0, q1);
 
   {
     // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
@@ -531,7 +536,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
   }
 }
 
-static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
+static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
+                                      uint8_t* dst, int stride) {
   int i;
   for (i = 0; i < 4; ++i, dst += stride) {
     WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
@@ -540,12 +546,12 @@ static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
 }
 
 // Transpose back and store
-static WEBP_INLINE void Store16x4(const __m128i* const p1,
-                                  const __m128i* const p0,
-                                  const __m128i* const q0,
-                                  const __m128i* const q1,
-                                  uint8_t* r0, uint8_t* r8,
-                                  int stride) {
+static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
+                                       const __m128i* const p0,
+                                       const __m128i* const q0,
+                                       const __m128i* const q1,
+                                       uint8_t* r0, uint8_t* r8,
+                                       int stride) {
   __m128i t1, p1_s, p0_s, q0_s, q1_s;
 
   // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -572,55 +578,55 @@ static WEBP_INLINE void Store16x4(const __m128i* const p1,
   p1_s = _mm_unpacklo_epi16(t1, q1_s);
   q1_s = _mm_unpackhi_epi16(t1, q1_s);
 
-  Store4x4(&p0_s, r0, stride);
+  Store4x4_SSE2(&p0_s, r0, stride);
   r0 += 4 * stride;
-  Store4x4(&q0_s, r0, stride);
+  Store4x4_SSE2(&q0_s, r0, stride);
 
-  Store4x4(&p1_s, r8, stride);
+  Store4x4_SSE2(&p1_s, r8, stride);
   r8 += 4 * stride;
-  Store4x4(&q1_s, r8, stride);
+  Store4x4_SSE2(&q1_s, r8, stride);
 }
 
 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
   // Load
   __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
   __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
   __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
   __m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
 
-  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+  DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
 
   // Store
   _mm_storeu_si128((__m128i*)&p[-stride], p0);
   _mm_storeu_si128((__m128i*)&p[0], q0);
 }
 
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
   __m128i p1, p0, q0, q1;
 
   p -= 2;  // beginning of p1
 
-  Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
-  DoFilter2(&p1, &p0, &q0, &q1, thresh);
-  Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
+  Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+  DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
+  Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
 }
 
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
+    SimpleVFilter16_SSE2(p, stride, thresh);
   }
 }
 
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4;
-    SimpleHFilter16(p, stride, thresh);
+    SimpleHFilter16_SSE2(p, stride, thresh);
   }
 }
 
@@ -628,60 +634,60 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
 // Complex In-loop filtering (Paragraph 15.3)
 
 #define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
-  m = MM_ABS(p1, p0);                                                          \
-  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
-  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+  (m) = MM_ABS(p1, p0);                                                        \
+  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
+  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
 } while (0)
 
 #define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
-  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
-  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
-  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+  (m) = _mm_max_epu8(m, MM_ABS(p1, p0));                                       \
+  (m) = _mm_max_epu8(m, MM_ABS(p3, p2));                                       \
+  (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
 } while (0)
 
 #define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
-  e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]);                            \
-  e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]);                            \
-  e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]);                            \
-  e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]);                            \
+  (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]);                        \
+  (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]);                        \
+  (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]);                        \
+  (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]);                        \
 }
 
 #define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
   const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
   const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]);                 \
-  p = _mm_unpacklo_epi64(U, V);                                                \
+  (p) = _mm_unpacklo_epi64(U, V);                                              \
 } while (0)
 
 #define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
-  LOADUV_H_EDGE(e1, u, v, 0 * stride);                                         \
-  LOADUV_H_EDGE(e2, u, v, 1 * stride);                                         \
-  LOADUV_H_EDGE(e3, u, v, 2 * stride);                                         \
-  LOADUV_H_EDGE(e4, u, v, 3 * stride);                                         \
+  LOADUV_H_EDGE(e1, u, v, 0 * (stride));                                       \
+  LOADUV_H_EDGE(e2, u, v, 1 * (stride));                                       \
+  LOADUV_H_EDGE(e3, u, v, 2 * (stride));                                       \
+  LOADUV_H_EDGE(e4, u, v, 3 * (stride));                                       \
 }
 
 #define STOREUV(p, u, v, stride) {                                             \
-  _mm_storel_epi64((__m128i*)&u[(stride)], p);                                 \
-  p = _mm_srli_si128(p, 8);                                                    \
-  _mm_storel_epi64((__m128i*)&v[(stride)], p);                                 \
+  _mm_storel_epi64((__m128i*)&(u)[(stride)], p);                               \
+  (p) = _mm_srli_si128(p, 8);                                                  \
+  _mm_storel_epi64((__m128i*)&(v)[(stride)], p);                               \
 }
 
-static WEBP_INLINE void ComplexMask(const __m128i* const p1,
-                                    const __m128i* const p0,
-                                    const __m128i* const q0,
-                                    const __m128i* const q1,
-                                    int thresh, int ithresh,
-                                    __m128i* const mask) {
+static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
+                                         const __m128i* const p0,
+                                         const __m128i* const q0,
+                                         const __m128i* const q1,
+                                         int thresh, int ithresh,
+                                         __m128i* const mask) {
   const __m128i it = _mm_set1_epi8(ithresh);
   const __m128i diff = _mm_subs_epu8(*mask, it);
   const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
   __m128i filter_mask;
-  NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
+  NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
   *mask = _mm_and_si128(thresh_mask, filter_mask);
 }
 
 // on macroblock edges
-static void VFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void VFilter16_SSE2(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   __m128i t1;
   __m128i mask;
   __m128i p2, p1, p0, q0, q1, q2;
@@ -694,8 +700,8 @@ static void VFilter16(uint8_t* p, int stride,
   LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
   MAX_DIFF2(t1, q2, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
   // Store
   _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
@@ -706,28 +712,28 @@ static void VFilter16(uint8_t* p, int stride,
   _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
 }
 
-static void HFilter16(uint8_t* p, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void HFilter16_SSE2(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
 
   uint8_t* const b = p - 4;
-  Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
   MAX_DIFF1(p3, p2, p1, p0, mask);
 
-  Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
+  Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
   MAX_DIFF2(q3, q2, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
-  Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
-  Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
+  Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
+  Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
 }
 
 // on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i_SSE2(uint8_t* p, int stride,
+                            int thresh, int ithresh, int hev_thresh) {
   int k;
   __m128i p3, p2, p1, p0;   // loop invariants
 
@@ -744,8 +750,8 @@ static void VFilter16i(uint8_t* p, int stride,
 
     // p3 and p2 are not just temporary variables here: they will be
     // re-used for next span. And q2/q3 will become p1/p0 accordingly.
-    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
-    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+    ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
 
     // Store
     _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
@@ -759,12 +765,12 @@ static void VFilter16i(uint8_t* p, int stride,
   }
 }
 
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i_SSE2(uint8_t* p, int stride,
+                            int thresh, int ithresh, int hev_thresh) {
   int k;
   __m128i p3, p2, p1, p0;   // loop invariants
 
-  Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue
+  Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue
 
   for (k = 3; k > 0; --k) {
     __m128i mask, tmp1, tmp2;
@@ -773,13 +779,13 @@ static void HFilter16i(uint8_t* p, int stride,
     p += 4;  // beginning of q0 (and next span)
 
     MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
-    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
+    Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
     MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
 
-    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
-    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+    ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
 
-    Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
+    Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
 
     // rotate samples
     p1 = tmp1;
@@ -788,8 +794,8 @@ static void HFilter16i(uint8_t* p, int stride,
 }
 
 // 8-pixels wide variant, for chroma filtering
-static void VFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
+static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, p2, p1, p0, q0, q1, q2;
 
@@ -801,8 +807,8 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
   LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
   MAX_DIFF2(t1, q2, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
   // Store
   STOREUV(p2, u, v, -3 * stride);
@@ -813,28 +819,28 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
   STOREUV(q2, u, v, 2 * stride);
 }
 
-static void HFilter8(uint8_t* u, uint8_t* v, int stride,
-                     int thresh, int ithresh, int hev_thresh) {
+static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
 
   uint8_t* const tu = u - 4;
   uint8_t* const tv = v - 4;
-  Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
   MAX_DIFF1(p3, p2, p1, p0, mask);
 
-  Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
+  Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
   MAX_DIFF2(q3, q2, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
-  Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
-  Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
+  Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
+  Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
 }
 
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, t2, p1, p0, q0, q1;
 
@@ -849,8 +855,8 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
   LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
   MAX_DIFF2(t2, t1, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
 
   // Store
   STOREUV(p1, u, v, -2 * stride);
@@ -859,24 +865,24 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
   STOREUV(q1, u, v, 1 * stride);
 }
 
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
+static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, t2, p1, p0, q0, q1;
-  Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
+  Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
   MAX_DIFF1(t2, t1, p1, p0, mask);
 
   u += 4;  // beginning of q0
   v += 4;
-  Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
+  Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
   MAX_DIFF2(t2, t1, q1, q0, mask);
 
-  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
-  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+  ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
 
   u -= 2;  // beginning of p1
   v -= 2;
-  Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
+  Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
 }
 
 //------------------------------------------------------------------------------
@@ -893,7 +899,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
 //   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
 //   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
 
-static void VE4(uint8_t* dst) {    // vertical
+static void VE4_SSE2(uint8_t* dst) {    // vertical
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
   const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -909,7 +915,7 @@ static void VE4(uint8_t* dst) {    // vertical
   }
 }
 
-static void LD4(uint8_t* dst) {   // Down-Left
+static void LD4_SSE2(uint8_t* dst) {   // Down-Left
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
   const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -925,7 +931,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
   WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
 }
 
-static void VR4(uint8_t* dst) {   // Vertical-Right
+static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
   const __m128i one = _mm_set1_epi8(1);
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
@@ -950,7 +956,7 @@ static void VR4(uint8_t* dst) {   // Vertical-Right
   DST(0, 3) = AVG3(K, J, I);
 }
 
-static void VL4(uint8_t* dst) {   // Vertical-Left
+static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
   const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@@ -975,7 +981,7 @@ static void VL4(uint8_t* dst) {   // Vertical-Left
   DST(3, 3) = (extra_out >> 8) & 0xff;
 }
 
-static void RD4(uint8_t* dst) {   // Down-right
+static void RD4_SSE2(uint8_t* dst) {   // Down-right
   const __m128i one = _mm_set1_epi8(1);
   const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
   const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
@@ -1004,7 +1010,7 @@ static void RD4(uint8_t* dst) {   // Down-right
 //------------------------------------------------------------------------------
 // Luma 16x16
 
-static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
   const uint8_t* top = dst - BPS;
   const __m128i zero = _mm_setzero_si128();
   int y;
@@ -1041,11 +1047,11 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
   }
 }
 
-static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
-static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
-static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }
+static void TM4_SSE2(uint8_t* dst)   { TrueMotion_SSE2(dst, 4); }
+static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
+static void TM16_SSE2(uint8_t* dst)  { TrueMotion_SSE2(dst, 16); }
 
-static void VE16(uint8_t* dst) {
+static void VE16_SSE2(uint8_t* dst) {
   const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
   int j;
   for (j = 0; j < 16; ++j) {
@@ -1053,7 +1059,7 @@ static void VE16(uint8_t* dst) {
   }
 }
 
-static void HE16(uint8_t* dst) {     // horizontal
+static void HE16_SSE2(uint8_t* dst) {     // horizontal
   int j;
   for (j = 16; j > 0; --j) {
     const __m128i values = _mm_set1_epi8(dst[-1]);
@@ -1062,7 +1068,7 @@ static void HE16(uint8_t* dst) {     // horizontal
   }
 }
 
-static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
   int j;
   const __m128i values = _mm_set1_epi8(v);
   for (j = 0; j < 16; ++j) {
@@ -1070,7 +1076,7 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
   }
 }
 
-static void DC16(uint8_t* dst) {    // DC
+static void DC16_SSE2(uint8_t* dst) {  // DC
   const __m128i zero = _mm_setzero_si128();
   const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
   const __m128i sad8x2 = _mm_sad_epu8(top, zero);
@@ -1083,37 +1089,37 @@ static void DC16(uint8_t* dst) {    // DC
   }
   {
     const int DC = _mm_cvtsi128_si32(sum) + left + 16;
-    Put16(DC >> 5, dst);
+    Put16_SSE2(DC >> 5, dst);
   }
 }
 
-static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
+static void DC16NoTop_SSE2(uint8_t* dst) {  // DC with top samples unavailable
   int DC = 8;
   int j;
   for (j = 0; j < 16; ++j) {
     DC += dst[-1 + j * BPS];
   }
-  Put16(DC >> 4, dst);
+  Put16_SSE2(DC >> 4, dst);
 }
 
-static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
+static void DC16NoLeft_SSE2(uint8_t* dst) {  // DC with left samples unavailable
   const __m128i zero = _mm_setzero_si128();
   const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
   const __m128i sad8x2 = _mm_sad_epu8(top, zero);
   // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
   const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
   const int DC = _mm_cvtsi128_si32(sum) + 8;
-  Put16(DC >> 4, dst);
+  Put16_SSE2(DC >> 4, dst);
 }
 
-static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
-  Put16(0x80, dst);
+static void DC16NoTopLeft_SSE2(uint8_t* dst) {  // DC with no top & left samples
+  Put16_SSE2(0x80, dst);
 }
 
 //------------------------------------------------------------------------------
 // Chroma
 
-static void VE8uv(uint8_t* dst) {    // vertical
+static void VE8uv_SSE2(uint8_t* dst) {    // vertical
   int j;
   const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
   for (j = 0; j < 8; ++j) {
@@ -1121,17 +1127,8 @@ static void VE8uv(uint8_t* dst) {    // vertical
   }
 }
 
-static void HE8uv(uint8_t* dst) {    // horizontal
-  int j;
-  for (j = 0; j < 8; ++j) {
-    const __m128i values = _mm_set1_epi8(dst[-1]);
-    _mm_storel_epi64((__m128i*)dst, values);
-    dst += BPS;
-  }
-}
-
 // helper for chroma-DC predictions
-static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
   int j;
   const __m128i values = _mm_set1_epi8(v);
   for (j = 0; j < 8; ++j) {
@@ -1139,7 +1136,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
   }
 }
 
-static void DC8uv(uint8_t* dst) {     // DC
+static void DC8uv_SSE2(uint8_t* dst) {     // DC
   const __m128i zero = _mm_setzero_si128();
   const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
   const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1150,29 +1147,29 @@ static void DC8uv(uint8_t* dst) {     // DC
   }
   {
     const int DC = _mm_cvtsi128_si32(sum) + left + 8;
-    Put8x8uv(DC >> 4, dst);
+    Put8x8uv_SSE2(DC >> 4, dst);
   }
 }
 
-static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+static void DC8uvNoLeft_SSE2(uint8_t* dst) {   // DC with no left samples
   const __m128i zero = _mm_setzero_si128();
   const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
   const __m128i sum = _mm_sad_epu8(top, zero);
   const int DC = _mm_cvtsi128_si32(sum) + 4;
-  Put8x8uv(DC >> 3, dst);
+  Put8x8uv_SSE2(DC >> 3, dst);
 }
 
-static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+static void DC8uvNoTop_SSE2(uint8_t* dst) {  // DC with no top samples
   int dc0 = 4;
   int i;
   for (i = 0; i < 8; ++i) {
     dc0 += dst[-1 + i * BPS];
   }
-  Put8x8uv(dc0 >> 3, dst);
+  Put8x8uv_SSE2(dc0 >> 3, dst);
 }
 
-static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
-  Put8x8uv(0x80, dst);
+static void DC8uvNoTopLeft_SSE2(uint8_t* dst) {    // DC with nothing
+  Put8x8uv_SSE2(0x80, dst);
 }
 
 //------------------------------------------------------------------------------
@@ -1181,47 +1178,46 @@ static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
 extern void VP8DspInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
-  VP8Transform = Transform;
-#if defined(USE_TRANSFORM_AC3)
-  VP8TransformAC3 = TransformAC3;
+  VP8Transform = Transform_SSE2;
+#if (USE_TRANSFORM_AC3 == 1)
+  VP8TransformAC3 = TransformAC3_SSE2;
 #endif
 
-  VP8VFilter16 = VFilter16;
-  VP8HFilter16 = HFilter16;
-  VP8VFilter8 = VFilter8;
-  VP8HFilter8 = HFilter8;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16i = HFilter16i;
-  VP8VFilter8i = VFilter8i;
-  VP8HFilter8i = HFilter8i;
-
-  VP8SimpleVFilter16 = SimpleVFilter16;
-  VP8SimpleHFilter16 = SimpleHFilter16;
-  VP8SimpleVFilter16i = SimpleVFilter16i;
-  VP8SimpleHFilter16i = SimpleHFilter16i;
-
-  VP8PredLuma4[1] = TM4;
-  VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[5] = VR4;
-  VP8PredLuma4[6] = LD4;
-  VP8PredLuma4[7] = VL4;
-
-  VP8PredLuma16[0] = DC16;
-  VP8PredLuma16[1] = TM16;
-  VP8PredLuma16[2] = VE16;
-  VP8PredLuma16[3] = HE16;
-  VP8PredLuma16[4] = DC16NoTop;
-  VP8PredLuma16[5] = DC16NoLeft;
-  VP8PredLuma16[6] = DC16NoTopLeft;
-
-  VP8PredChroma8[0] = DC8uv;
-  VP8PredChroma8[1] = TM8uv;
-  VP8PredChroma8[2] = VE8uv;
-  VP8PredChroma8[3] = HE8uv;
-  VP8PredChroma8[4] = DC8uvNoTop;
-  VP8PredChroma8[5] = DC8uvNoLeft;
-  VP8PredChroma8[6] = DC8uvNoTopLeft;
+  VP8VFilter16 = VFilter16_SSE2;
+  VP8HFilter16 = HFilter16_SSE2;
+  VP8VFilter8 = VFilter8_SSE2;
+  VP8HFilter8 = HFilter8_SSE2;
+  VP8VFilter16i = VFilter16i_SSE2;
+  VP8HFilter16i = HFilter16i_SSE2;
+  VP8VFilter8i = VFilter8i_SSE2;
+  VP8HFilter8i = HFilter8i_SSE2;
+
+  VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
+  VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
+  VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
+  VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
+
+  VP8PredLuma4[1] = TM4_SSE2;
+  VP8PredLuma4[2] = VE4_SSE2;
+  VP8PredLuma4[4] = RD4_SSE2;
+  VP8PredLuma4[5] = VR4_SSE2;
+  VP8PredLuma4[6] = LD4_SSE2;
+  VP8PredLuma4[7] = VL4_SSE2;
+
+  VP8PredLuma16[0] = DC16_SSE2;
+  VP8PredLuma16[1] = TM16_SSE2;
+  VP8PredLuma16[2] = VE16_SSE2;
+  VP8PredLuma16[3] = HE16_SSE2;
+  VP8PredLuma16[4] = DC16NoTop_SSE2;
+  VP8PredLuma16[5] = DC16NoLeft_SSE2;
+  VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
+
+  VP8PredChroma8[0] = DC8uv_SSE2;
+  VP8PredChroma8[1] = TM8uv_SSE2;
+  VP8PredChroma8[2] = VE8uv_SSE2;
+  VP8PredChroma8[4] = DC8uvNoTop_SSE2;
+  VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
+  VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/dec_sse41.c b/thirdparty/libwebp/src/dsp/dec_sse41.c
index 4e81ec4d80..8f18506d54 100644
--- a/thirdparty/libwebp/dsp/dec_sse41.c
+++ b/thirdparty/libwebp/src/dsp/dec_sse41.c
@@ -11,15 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE41)
 
 #include <smmintrin.h>
-#include "../dec/vp8i_dec.h"
-#include "../utils/utils.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
 
-static void HE16(uint8_t* dst) {     // horizontal
+static void HE16_SSE41(uint8_t* dst) {     // horizontal
   int j;
   const __m128i kShuffle3 = _mm_set1_epi8(3);
   for (j = 16; j > 0; --j) {
@@ -36,7 +36,7 @@ static void HE16(uint8_t* dst) {     // horizontal
 extern void VP8DspInitSSE41(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
-  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[3] = HE16_SSE41;
 }
 
 #else  // !WEBP_USE_SSE41
diff --git a/thirdparty/libwebp/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h
index 813fed4a35..99eefe092f 100644
--- a/thirdparty/libwebp/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@@ -15,10 +15,10 @@
 #define WEBP_DSP_DSP_H_
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -38,10 +38,22 @@ extern "C" {
 # define LOCAL_GCC_PREREQ(maj, min) 0
 #endif
 
+#if defined(__clang__)
+# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+# define LOCAL_CLANG_PREREQ(maj, min) \
+    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_CLANG_VERSION 0
+# define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
 #ifndef __has_builtin
 # define __has_builtin(x) 0
 #endif
 
+// for now, none of the optimizations below are available in emscripten
+#if !defined(EMSCRIPTEN)
+
 #if defined(_MSC_VER) && _MSC_VER > 1310 && \
     (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
@@ -68,18 +80,20 @@ extern "C" {
 #define WEBP_USE_AVX2
 #endif
 
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
-#define WEBP_ANDROID_NEON  // Android targets that might support NEON
-#endif
-
 // The intrinsics currently cause compiler errors with arm-nacl-gcc and the
 // inline assembly would need to be modified for use with Native Client.
-#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
+#if (defined(__ARM_NEON__) || \
      defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
     !defined(__native_client__)
 #define WEBP_USE_NEON
 #endif
 
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
 #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
 #define WEBP_USE_NEON
 #define WEBP_USE_INTRINSICS
@@ -90,7 +104,7 @@ extern "C" {
 #define WEBP_USE_MIPS32
 #if (__mips_isa_rev >= 2)
 #define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
 #define WEBP_USE_MIPS_DSP_R2
 #endif
 #endif
@@ -100,6 +114,24 @@ extern "C" {
 #define WEBP_USE_MSA
 #endif
 
+#endif  /* EMSCRIPTEN */
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
 // This macro prevents thread_sanitizer from reporting known concurrent writes.
 #define WEBP_TSAN_IGNORE_FUNCTION
 #if defined(__has_feature)
@@ -129,6 +161,11 @@ extern "C" {
 #endif
 #endif
 
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
 typedef enum {
   kSSE2,
   kSSE3,
@@ -143,7 +180,7 @@ typedef enum {
 } CPUFeature;
 // returns true if the CPU supports the feature.
 typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 
 //------------------------------------------------------------------------------
 // Init stub generator
@@ -271,6 +308,7 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
                                         int xo, int yo,  // center position
                                         int W, int H);   // plane dimension
 
+#if !defined(WEBP_REDUCE_SIZE)
 // This version is called with the guarantee that you can load 8 bytes and
 // 8 rows at offset src1 and src2
 typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
@@ -278,10 +316,13 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
 
 extern VP8SSIMGetFunc VP8SSIMGet;         // unclipped / unchecked
 extern VP8SSIMGetClippedFunc VP8SSIMGetClipped;   // with clipping
+#endif
 
+#if !defined(WEBP_DISABLE_STATS)
 typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
                                          const uint8_t* src2, int len);
 extern VP8AccumulateSSEFunc VP8AccumulateSSE;
+#endif
 
 // must be called before using any of the above directly
 void VP8SSIMDspInit(void);
@@ -462,12 +503,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
 extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
 
 // Plain-C implementation, as fall-back.
-extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
-                                         const uint8_t* src);
-extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
-                                         const uint8_t* src);
-extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
-extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
+extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
+extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
 
 // Main entry calls:
 extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
@@ -533,24 +574,21 @@ void WebPMultRows(uint8_t* ptr, int stride,
                   int width, int num_rows, int inverse);
 
 // Plain-C versions, used as fallback by some implementations.
-void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
-                  int width, int inverse);
-void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
-
-// To be called first before using the above.
-void WebPInitAlphaProcessing(void);
-
-// ARGB packing function: a/r/g/b input is rgba or bgra order.
-extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
-                           const uint8_t* g, const uint8_t* b, int len,
-                           uint32_t* out);
+void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
+                   int width, int inverse);
+void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
 
 // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
-extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                          int len, int step, uint32_t* out);
+extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                           int len, int step, uint32_t* out);
+
+// This function returns true if src[i] contains a value different from 0xff.
+extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
+// This function returns true if src[4*i] contains a value different from 0xff.
+extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
 
 // To be called first before using the above.
-void VP8EncDspARGBInit(void);
+void WebPInitAlphaProcessing(void);
 
 //------------------------------------------------------------------------------
 // Filter functions
diff --git a/thirdparty/libwebp/dsp/enc.c b/thirdparty/libwebp/src/dsp/enc.c
index f31bc6de18..1c807f1df7 100644
--- a/thirdparty/libwebp/dsp/enc.c
+++ b/thirdparty/libwebp/src/dsp/enc.c
@@ -14,16 +14,18 @@
 #include <assert.h>
 #include <stdlib.h>  // for abs()
 
-#include "./dsp.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/enc/vp8i_enc.h"
 
 static WEBP_INLINE uint8_t clip_8b(int v) {
   return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int clip_max(int v, int max) {
   return (v > max) ? max : v;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
@@ -56,9 +58,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
   histo->last_non_zero = last_non_zero;
 }
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
+                               int start_block, int end_block,
+                               VP8Histogram* const histo) {
   int j;
   int distribution[MAX_COEFF_THRESH + 1] = { 0 };
   for (j = start_block; j < end_block; ++j) {
@@ -76,6 +79,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
   }
   VP8SetHistogramData(distribution, histo);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // run-time tables (~4k)
@@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
+#if !WEBP_NEON_OMIT_C_CODE
+
 #define STORE(x, y, v) \
   dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
 
@@ -140,15 +146,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   }
 }
 
-static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                       int do_two) {
+static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                         int do_two) {
   ITransformOne(ref, in, dst);
   if (do_two) {
     ITransformOne(ref + 4, in + 16, dst + 4);
   }
 }
 
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   int i;
   int tmp[16];
   for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
@@ -176,13 +182,16 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
     out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
+                          int16_t* out) {
   VP8FTransform(src, ref, out);
   VP8FTransform(src + 4, ref + 4, out + 16);
 }
 
-static void FTransformWHT(const int16_t* in, int16_t* out) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void FTransformWHT_C(const int16_t* in, int16_t* out) {
   // input is 12b signed
   int32_t tmp[16];
   int i;
@@ -211,6 +220,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
     out[12 + i] = b3 >> 1;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef MUL
 #undef STORE
@@ -303,8 +313,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // Chroma 8x8 prediction (paragraph 12.2)
 
-static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
-                             const uint8_t* top) {
+static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
+                               const uint8_t* top) {
   // U block
   DCMode(C8DC8 + dst, left, top, 8, 8, 4);
   VerticalPred(C8VE8 + dst, top, 8);
@@ -323,8 +333,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // luma 16x16 prediction (paragraph 12.3)
 
-static void Intra16Preds(uint8_t* dst,
-                         const uint8_t* left, const uint8_t* top) {
+static void Intra16Preds_C(uint8_t* dst,
+                           const uint8_t* left, const uint8_t* top) {
   DCMode(I16DC16 + dst, left, top, 16, 16, 5);
   VerticalPred(I16VE16 + dst, top, 16);
   HorizontalPred(I16HE16 + dst, left, 16);
@@ -507,7 +517,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
 
 // Left samples are top[-5 .. -2], top_left is top[-1], top are
 // located at top[0..3], and top right is top[4..7]
-static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
   DC4(I4DC4 + dst, top);
   TM4(I4TM4 + dst, top);
   VE4(I4VE4 + dst, top);
@@ -523,6 +533,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
 //------------------------------------------------------------------------------
 // Metric
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
                               int w, int h) {
   int count = 0;
@@ -538,20 +549,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
   return count;
 }
 
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
   return GetSSE(a, b, 16, 16);
 }
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
   return GetSSE(a, b, 16, 8);
 }
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
   return GetSSE(a, b, 8, 8);
 }
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
   return GetSSE(a, b, 4, 4);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
+static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
   int k, x, y;
   for (k = 0; k < 4; ++k) {
     uint32_t avg = 0;
@@ -571,6 +583,7 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
 // We try to match the spectral content (weighted) between source and
 // reconstructed samples.
 
+#if !WEBP_NEON_OMIT_C_CODE
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
@@ -608,24 +621,25 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
   return sum;
 }
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
+static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
   const int sum1 = TTransform(a, w);
   const int sum2 = TTransform(b, w);
   return abs(sum2 - sum1) >> 5;
 }
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
+                        const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_C(a + x + y, b + x + y, w);
     }
   }
   return D;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Quantization
@@ -636,8 +650,8 @@ static const uint8_t kZigzag[16] = {
 };
 
 // Simple quantization
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
+static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
+                           const VP8Matrix* const mtx) {
   int last = -1;
   int n;
   for (n = 0; n < 16; ++n) {
@@ -662,13 +676,15 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   return (last >= 0);
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
+                             const VP8Matrix* const mtx) {
   int nz;
   nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
   nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
   return nz;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 // Block copy
@@ -682,149 +698,15 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
   }
 }
 
-static void Copy4x4(const uint8_t* src, uint8_t* dst) {
+static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
   Copy(src, dst, 4, 4);
 }
 
-static void Copy16x8(const uint8_t* src, uint8_t* dst) {
+static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
   Copy(src, dst, 16, 8);
 }
 
 //------------------------------------------------------------------------------
-// SSIM / PSNR
-
-// hat-shaped filter. Sum of coefficients is equal to 16.
-static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = {
-  1, 2, 3, 4, 3, 2, 1
-};
-static const uint32_t kWeightSum = 16 * 16;   // sum{kWeight}^2
-
-static WEBP_INLINE double SSIMCalculation(
-    const VP8DistoStats* const stats, uint32_t N  /*num samples*/) {
-  const uint32_t w2 =  N * N;
-  const uint32_t C1 = 20 * w2;
-  const uint32_t C2 = 60 * w2;
-  const uint32_t C3 = 8 * 8 * w2;   // 'dark' limit ~= 6
-  const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
-  const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
-  if (xmxm + ymym >= C3) {
-    const int64_t xmym = (int64_t)stats->xm * stats->ym;
-    const int64_t sxy = (int64_t)stats->xym * N - xmym;    // can be negative
-    const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
-    const uint64_t syy = (uint64_t)stats->yym * N - ymym;
-    // we descale by 8 to prevent overflow during the fnum/fden multiply.
-    const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8;
-    const uint64_t den_S = (sxx + syy + C2) >> 8;
-    const uint64_t fnum = (2 * xmym + C1) * num_S;
-    const uint64_t fden = (xmxm + ymym + C1) * den_S;
-    const double r = (double)fnum / fden;
-    assert(r >= 0. && r <= 1.0);
-    return r;
-  }
-  return 1.;   // area is too dark to contribute meaningfully
-}
-
-double VP8SSIMFromStats(const VP8DistoStats* const stats) {
-  return SSIMCalculation(stats, kWeightSum);
-}
-
-double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
-  return SSIMCalculation(stats, stats->w);
-}
-
-static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
-                               const uint8_t* src2, int stride2,
-                               int xo, int yo, int W, int H) {
-  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
-  const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
-  const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
-                                                  : yo + VP8_SSIM_KERNEL;
-  const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
-  const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
-                                                  : xo + VP8_SSIM_KERNEL;
-  int x, y;
-  src1 += ymin * stride1;
-  src2 += ymin * stride2;
-  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
-    for (x = xmin; x <= xmax; ++x) {
-      const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
-                       * kWeight[VP8_SSIM_KERNEL + y - yo];
-      const uint32_t s1 = src1[x];
-      const uint32_t s2 = src2[x];
-      stats.w   += w;
-      stats.xm  += w * s1;
-      stats.ym  += w * s2;
-      stats.xxm += w * s1 * s1;
-      stats.xym += w * s1 * s2;
-      stats.yym += w * s2 * s2;
-    }
-  }
-  return VP8SSIMFromStatsClipped(&stats);
-}
-
-static double SSIMGet_C(const uint8_t* src1, int stride1,
-                        const uint8_t* src2, int stride2) {
-  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
-  int x, y;
-  for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
-    for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
-      const uint32_t w = kWeight[x] * kWeight[y];
-      const uint32_t s1 = src1[x];
-      const uint32_t s2 = src2[x];
-      stats.xm  += w * s1;
-      stats.ym  += w * s2;
-      stats.xxm += w * s1 * s1;
-      stats.xym += w * s1 * s2;
-      stats.yym += w * s2 * s2;
-    }
-  }
-  return VP8SSIMFromStats(&stats);
-}
-
-//------------------------------------------------------------------------------
-
-static uint32_t AccumulateSSE(const uint8_t* src1,
-                              const uint8_t* src2, int len) {
-  int i;
-  uint32_t sse2 = 0;
-  assert(len <= 65535);  // to ensure that accumulation fits within uint32_t
-  for (i = 0; i < len; ++i) {
-    const int32_t diff = src1[i] - src2[i];
-    sse2 += diff * diff;
-  }
-  return sse2;
-}
-
-//------------------------------------------------------------------------------
-
-VP8SSIMGetFunc VP8SSIMGet;
-VP8SSIMGetClippedFunc VP8SSIMGetClipped;
-VP8AccumulateSSEFunc VP8AccumulateSSE;
-
-extern void VP8SSIMDspInitSSE2(void);
-
-static volatile VP8CPUInfo ssim_last_cpuinfo_used =
-    (VP8CPUInfo)&ssim_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
-  if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8SSIMGetClipped = SSIMGetClipped_C;
-  VP8SSIMGet = SSIMGet_C;
-
-  VP8AccumulateSSE = AccumulateSSE;
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      VP8SSIMDspInitSSE2();
-    }
-#endif
-  }
-
-  ssim_last_cpuinfo_used = VP8GetCPUInfo;
-}
-
-//------------------------------------------------------------------------------
 // Initialization
 
 // Speed-critical function pointers. We have to initialize them to the default
@@ -868,26 +750,32 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
   InitTables();
 
   // default C implementations
-  VP8CollectHistogram = CollectHistogram;
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
-  VP8FTransform2 = FTransform2;
-  VP8FTransformWHT = FTransformWHT;
-  VP8EncPredLuma4 = Intra4Preds;
-  VP8EncPredLuma16 = Intra16Preds;
-  VP8EncPredChroma8 = IntraChromaPreds;
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE4x4 = SSE4x4;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-  VP8Mean16x4 = Mean16x4;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlock;
-  VP8Copy4x4 = Copy4x4;
-  VP8Copy16x8 = Copy16x8;
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8ITransform = ITransform_C;
+  VP8FTransform = FTransform_C;
+  VP8FTransformWHT = FTransformWHT_C;
+  VP8TDisto4x4 = Disto4x4_C;
+  VP8TDisto16x16 = Disto16x16_C;
+  VP8CollectHistogram = CollectHistogram_C;
+  VP8SSE16x16 = SSE16x16_C;
+  VP8SSE16x8 = SSE16x8_C;
+  VP8SSE8x8 = SSE8x8_C;
+  VP8SSE4x4 = SSE4x4_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8EncQuantizeBlock = QuantizeBlock_C;
+  VP8EncQuantize2Blocks = Quantize2Blocks_C;
+#endif
+
+  VP8FTransform2 = FTransform2_C;
+  VP8EncPredLuma4 = Intra4Preds_C;
+  VP8EncPredLuma16 = Intra16Preds_C;
+  VP8EncPredChroma8 = IntraChromaPreds_C;
+  VP8Mean16x4 = Mean16x4_C;
+  VP8EncQuantizeBlockWHT = QuantizeBlock_C;
+  VP8Copy4x4 = Copy4x4_C;
+  VP8Copy16x8 = Copy16x8_C;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -906,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
       VP8EncDspInitAVX2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8EncDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8EncDspInitMIPS32();
@@ -927,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8EncDspInitNEON();
+  }
+#endif
+
+  assert(VP8ITransform != NULL);
+  assert(VP8FTransform != NULL);
+  assert(VP8FTransformWHT != NULL);
+  assert(VP8TDisto4x4 != NULL);
+  assert(VP8TDisto16x16 != NULL);
+  assert(VP8CollectHistogram != NULL);
+  assert(VP8SSE16x16 != NULL);
+  assert(VP8SSE16x8 != NULL);
+  assert(VP8SSE8x8 != NULL);
+  assert(VP8SSE4x4 != NULL);
+  assert(VP8EncQuantizeBlock != NULL);
+  assert(VP8EncQuantize2Blocks != NULL);
+  assert(VP8FTransform2 != NULL);
+  assert(VP8EncPredLuma4 != NULL);
+  assert(VP8EncPredLuma16 != NULL);
+  assert(VP8EncPredChroma8 != NULL);
+  assert(VP8Mean16x4 != NULL);
+  assert(VP8EncQuantizeBlockWHT != NULL);
+  assert(VP8Copy4x4 != NULL);
+  assert(VP8Copy16x8 != NULL);
+
   enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/enc_avx2.c b/thirdparty/libwebp/src/dsp/enc_avx2.c
index 93efb30b10..8bc5798fee 100644
--- a/thirdparty/libwebp/dsp/enc_avx2.c
+++ b/thirdparty/libwebp/src/dsp/enc_avx2.c
@@ -9,7 +9,7 @@
 //
 // AVX2 version of speed-critical encoding functions.
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_AVX2)
 
diff --git a/thirdparty/libwebp/dsp/enc_mips32.c b/thirdparty/libwebp/src/dsp/enc_mips32.c
index 752b14daf6..618f0fc0ee 100644
--- a/thirdparty/libwebp/dsp/enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/enc_mips32.c
@@ -13,13 +13,13 @@
 //            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 //            Slobodan Prijic  (slobodan.prijic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS32)
 
-#include "./mips_macro.h"
-#include "../enc/vp8i_enc.h"
-#include "../enc/cost_enc.h"
+#include "src/dsp/mips_macro.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/cost_enc.h"
 
 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
@@ -113,8 +113,9 @@ static const int kC2 = 35468;
   "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
 
 // Does one or two inverse transforms.
-static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
-                                      uint8_t* dst) {
+static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
+                                             const int16_t* in,
+                                             uint8_t* dst) {
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
   int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
   int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
@@ -144,11 +145,11 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   );
 }
 
-static void ITransform(const uint8_t* ref, const int16_t* in,
-                       uint8_t* dst, int do_two) {
-  ITransformOne(ref, in, dst);
+static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
+                              uint8_t* dst, int do_two) {
+  ITransformOne_MIPS32(ref, in, dst);
   if (do_two) {
-    ITransformOne(ref + 4, in + 16, dst + 4);
+    ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
   }
 }
 
@@ -187,8 +188,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
   "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
   "sh           %[level],       " #N "(%[pout])                     \n\t"
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
+static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
+                                const VP8Matrix* const mtx) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   int sign, coeff, level, i;
   int max_level = MAX_LEVEL;
@@ -238,11 +239,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   return 0;
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
+                                  const VP8Matrix* const mtx) {
   int nz;
-  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  nz  = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
   return nz;
 }
 
@@ -361,8 +362,8 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
   "msub   %[temp6],  %[temp0]                \n\t"                \
   "msub   %[temp7],  %[temp1]                \n\t"
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
+static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
+                           const uint16_t* const w) {
   int tmp[32];
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
 
@@ -396,13 +397,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
 #undef VERTICAL_PASS
 #undef HORIZONTAL_PASS
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
+                             const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_MIPS32(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -478,7 +479,8 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
   "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
   "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
 
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
+                              int16_t* out) {
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
   int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
   int temp17, temp18, temp19, temp20;
@@ -539,7 +541,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
   GET_SSE_INNER(D, D + 1, D + 2, D + 3)
 
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
@@ -573,7 +575,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
@@ -599,7 +601,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
@@ -621,7 +623,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
@@ -651,17 +653,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 extern void VP8EncDspInitMIPS32(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
+  VP8ITransform = ITransform_MIPS32;
+  VP8FTransform = FTransform_MIPS32;
+
+  VP8EncQuantizeBlock = QuantizeBlock_MIPS32;
+  VP8EncQuantize2Blocks = Quantize2Blocks_MIPS32;
+
+  VP8TDisto4x4 = Disto4x4_MIPS32;
+  VP8TDisto16x16 = Disto16x16_MIPS32;
+
 #if !defined(WORK_AROUND_GCC)
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE4x4 = SSE4x4;
+  VP8SSE16x16 = SSE16x16_MIPS32;
+  VP8SSE8x8 = SSE8x8_MIPS32;
+  VP8SSE16x8 = SSE16x8_MIPS32;
+  VP8SSE4x4 = SSE4x4_MIPS32;
 #endif
 }
 
diff --git a/thirdparty/libwebp/dsp/enc_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
index 6c8c1c6acd..9ddd895086 100644
--- a/thirdparty/libwebp/dsp/enc_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
@@ -12,13 +12,13 @@
 // Author(s): Darko Laus (darko.laus@imgtec.com)
 //            Mirko Raus (mirko.raus@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "./mips_macro.h"
-#include "../enc/cost_enc.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/mips_macro.h"
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
 
 static const int kC1 = 20091 + (1 << 16);
 static const int kC2 = 35468;
@@ -141,7 +141,8 @@ static const int kC2 = 35468;
   "sh              %[" #TEMP8 "],   " #D "(%[temp20])               \n\t"      \
   "sh              %[" #TEMP12 "],  " #B "(%[temp20])               \n\t"
 
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
+                                 int16_t* out) {
   const int c2217 = 2217;
   const int c5352 = 5352;
   int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
@@ -238,16 +239,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   );
 }
 
-static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                       int do_two) {
+static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
+                                 uint8_t* dst, int do_two) {
   ITransformOne(ref, in, dst);
   if (do_two) {
     ITransformOne(ref + 4, in + 16, dst + 4);
   }
 }
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
+static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
+                              const uint16_t* const w) {
   int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
   int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
 
@@ -313,13 +314,14 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
   return abs(temp3 - temp17) >> 5;
 }
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_MIPSdspR2(const uint8_t* const a,
+                                const uint8_t* const b,
+                                const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_MIPSdspR2(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -1011,8 +1013,8 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
 //------------------------------------------------------------------------------
 // Chroma 8x8 prediction (paragraph 12.2)
 
-static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
-                             const uint8_t* top) {
+static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
+                                       const uint8_t* top) {
   // U block
   DCMode8(C8DC8 + dst, left, top);
   VerticalPred8(C8VE8 + dst, top);
@@ -1031,8 +1033,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
 //------------------------------------------------------------------------------
 // luma 16x16 prediction (paragraph 12.3)
 
-static void Intra16Preds(uint8_t* dst,
-                         const uint8_t* left, const uint8_t* top) {
+static void Intra16Preds_MIPSdspR2(uint8_t* dst,
+                                   const uint8_t* left, const uint8_t* top) {
   DCMode16(I16DC16 + dst, left, top);
   VerticalPred16(I16VE16 + dst, top);
   HorizontalPred16(I16HE16 + dst, left);
@@ -1041,7 +1043,7 @@ static void Intra16Preds(uint8_t* dst,
 
 // Left samples are top[-5 .. -2], top_left is top[-1], top are
 // located at top[0..3], and top right is top[4..7]
-static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
   DC4(I4DC4 + dst, top);
   TM4(I4TM4 + dst, top);
   VE4(I4VE4 + dst, top);
@@ -1077,7 +1079,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
   GET_SSE_INNER(C)                        \
   GET_SSE_INNER(D)
 
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3;
   __asm__ volatile (
@@ -1107,7 +1109,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3;
   __asm__ volatile (
@@ -1129,7 +1131,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3;
   __asm__ volatile (
@@ -1147,7 +1149,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
   return count;
 }
 
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
   int count;
   int temp0, temp1, temp2, temp3;
   __asm__ volatile (
@@ -1270,8 +1272,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
   "usw         $0,           " #J "(%[ppin])                 \n\t"        \
 "3:                                                          \n\t"
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
+static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
+                                   const VP8Matrix* const mtx) {
   int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
   int sign, coeff, level;
   int max_level = MAX_LEVEL;
@@ -1311,11 +1313,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   return (ret != 0);
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
+                                     const VP8Matrix* const mtx) {
   int nz;
-  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  nz  = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
   return nz;
 }
 
@@ -1358,7 +1360,7 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
   "usw             %[" #TEMP4 "],  " #C "(%[out])                 \n\t"        \
   "usw             %[" #TEMP6 "],  " #D "(%[out])                 \n\t"
 
-static void FTransformWHT(const int16_t* in, int16_t* out) {
+static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
   int temp0, temp1, temp2, temp3, temp4;
   int temp5, temp6, temp7, temp8, temp9;
 
@@ -1450,9 +1452,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
   "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
   "sw         %[temp8],  0(%[temp3])                   \n\t"
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
+                                       int start_block, int end_block,
+                                       VP8Histogram* const histo) {
   int j;
   int distribution[MAX_COEFF_THRESH + 1] = { 0 };
   const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
@@ -1484,23 +1486,28 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
 extern void VP8EncDspInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
-  VP8FTransform = FTransform;
-  VP8ITransform = ITransform;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-  VP8EncPredLuma16 = Intra16Preds;
-  VP8EncPredChroma8 = IntraChromaPreds;
-  VP8EncPredLuma4 = Intra4Preds;
+  VP8FTransform = FTransform_MIPSdspR2;
+  VP8FTransformWHT = FTransformWHT_MIPSdspR2;
+  VP8ITransform = ITransform_MIPSdspR2;
+
+  VP8TDisto4x4 = Disto4x4_MIPSdspR2;
+  VP8TDisto16x16 = Disto16x16_MIPSdspR2;
+
+  VP8EncPredLuma16 = Intra16Preds_MIPSdspR2;
+  VP8EncPredChroma8 = IntraChromaPreds_MIPSdspR2;
+  VP8EncPredLuma4 = Intra4Preds_MIPSdspR2;
+
 #if !defined(WORK_AROUND_GCC)
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE4x4 = SSE4x4;
+  VP8SSE16x16 = SSE16x16_MIPSdspR2;
+  VP8SSE8x8 = SSE8x8_MIPSdspR2;
+  VP8SSE16x8 = SSE16x8_MIPSdspR2;
+  VP8SSE4x4 = SSE4x4_MIPSdspR2;
 #endif
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8FTransformWHT = FTransformWHT;
-  VP8CollectHistogram = CollectHistogram;
+
+  VP8EncQuantizeBlock = QuantizeBlock_MIPSdspR2;
+  VP8EncQuantize2Blocks = Quantize2Blocks_MIPSdspR2;
+
+  VP8CollectHistogram = CollectHistogram_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/enc_msa.c b/thirdparty/libwebp/src/dsp/enc_msa.c
index 909b46d5d9..6f85add4bb 100644
--- a/thirdparty/libwebp/dsp/enc_msa.c
+++ b/thirdparty/libwebp/src/dsp/enc_msa.c
@@ -11,13 +11,13 @@
 //
 // Author:  Prashant Patil   (prashant.patil@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
 #include <stdlib.h>
-#include "./msa_macro.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/msa_macro.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Transforms
@@ -69,20 +69,21 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
 }
 
-static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                       int do_two) {
+static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                           int do_two) {
   ITransformOne(ref, in, dst);
   if (do_two) {
     ITransformOne(ref + 4, in + 16, dst + 4);
   }
 }
 
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
+                           int16_t* out) {
   uint64_t out0, out1, out2, out3;
   uint32_t in0, in1, in2, in3;
   v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
   v8i16 t0, t1, t2, t3;
-  v16u8 srcl0, srcl1, src0, src1;
+  v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
   const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
   const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
   const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
@@ -130,7 +131,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   SD4(out0, out1, out2, out3, out, 8);
 }
 
-static void FTransformWHT(const int16_t* in, int16_t* out) {
+static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
   v8i16 in0 = { 0 };
   v8i16 in1 = { 0 };
   v8i16 tmp0, tmp1, tmp2, tmp3;
@@ -167,10 +168,10 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
   ST_SH2(out0, out1, out, 8);
 }
 
-static int TTransform(const uint8_t* in, const uint16_t* w) {
+static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
   int sum;
   uint32_t in0_m, in1_m, in2_m, in3_m;
-  v16i8 src0;
+  v16i8 src0 = { 0 };
   v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
   v4i32 dst0, dst1;
   const v16i8 zero = { 0 };
@@ -199,20 +200,20 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
   return sum;
 }
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
-  const int sum1 = TTransform(a, w);
-  const int sum2 = TTransform(b, w);
+static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
+                        const uint16_t* const w) {
+  const int sum1 = TTransform_MSA(a, w);
+  const int sum2 = TTransform_MSA(b, w);
   return abs(sum2 - sum1) >> 5;
 }
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
+                          const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_MSA(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -221,9 +222,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 //------------------------------------------------------------------------------
 // Histogram
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
+                                 int start_block, int end_block,
+                                 VP8Histogram* const histo) {
   int j;
   int distribution[MAX_COEFF_THRESH + 1] = { 0 };
   for (j = start_block; j < end_block; ++j) {
@@ -259,8 +260,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
 
 static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) {    // vertical
+  const v16u8 A1 = { 0 };
   const uint64_t val_m = LD(top - 1);
-  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
+  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
   const v16u8 B = SLDI_UB(A, A, 1);
   const v16u8 C = SLDI_UB(A, A, 2);
   const v16u8 AC = __msa_ave_u_b(A, C);
@@ -292,8 +294,9 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
 }
 
 static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
+  const v16u8 A2 = { 0 };
   const uint64_t val_m = LD(top - 5);
-  const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
+  const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
   const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
   const v16u8 B = SLDI_UB(A, A, 1);
   const v16u8 C = SLDI_UB(A, A, 2);
@@ -311,8 +314,9 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
 }
 
 static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
+  const v16u8 A1 = { 0 };
   const uint64_t val_m = LD(top);
-  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
+  const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
   const v16u8 B = SLDI_UB(A, A, 1);
   const v16u8 C1 = SLDI_UB(A, A, 2);
   const v16u8 C = (v16u8)__msa_insert_b((v16i8)C1, 6, top[7]);
@@ -427,7 +431,7 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
 #undef AVG3
 #undef AVG2
 
-static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
   DC4(I4DC4 + dst, top);
   TM4(I4TM4 + dst, top);
   VE4(I4VE4 + dst, top);
@@ -544,8 +548,8 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
   STORE16x16(out, dst);
 }
 
-static void Intra16Preds(uint8_t* dst,
-                         const uint8_t* left, const uint8_t* top) {
+static void Intra16Preds_MSA(uint8_t* dst,
+                             const uint8_t* left, const uint8_t* top) {
   DCMode16x16(I16DC16 + dst, left, top);
   VerticalPred16x16(I16VE16 + dst, top);
   HorizontalPred16x16(I16HE16 + dst, left);
@@ -645,7 +649,7 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
 static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
                                   const uint8_t* top) {
   uint64_t out;
-  v16u8 src;
+  v16u8 src = { 0 };
   if (top != NULL && left != NULL) {
     const uint64_t left_m = LD(left);
     const uint64_t top_m = LD(top);
@@ -666,8 +670,8 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
   STORE8x8(out, dst);
 }
 
-static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
-                             const uint8_t* top) {
+static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
+                                 const uint8_t* top) {
   // U block
   DCMode8x8(C8DC8 + dst, left, top);
   VerticalPred8x8(C8VE8 + dst, top);
@@ -708,7 +712,7 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
   DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3);                         \
 } while (0)
 
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
   uint32_t sum;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
   v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@@ -735,7 +739,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
   return sum;
 }
 
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
   uint32_t sum;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
   v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@@ -754,7 +758,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
   return sum;
 }
 
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
   uint32_t sum;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
   v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@@ -774,10 +778,10 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
   return sum;
 }
 
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
   uint32_t sum = 0;
   uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 src, ref, tmp0, tmp1;
+  v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
   v8i16 diff0, diff1;
   v4i32 out0, out1;
 
@@ -796,8 +800,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 //------------------------------------------------------------------------------
 // Quantization
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
+static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
+                             const VP8Matrix* const mtx) {
   int sum;
   v8i16 in0, in1, sh0, sh1, out0, out1;
   v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
@@ -828,7 +832,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   tmp1 = (tmp3 > maxlevel);
   tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)maxlevel, (v16u8)tmp0);
   tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)maxlevel, (v16u8)tmp1);
-  SUB2(0, tmp2, 0, tmp3, tmp0, tmp1);
+  SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
   tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
   tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
   LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3);   // zthresh
@@ -849,8 +853,8 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   return (sum > 0);
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
+                               const VP8Matrix* const mtx) {
   int nz;
   nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
   nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
@@ -863,26 +867,26 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 extern void VP8EncDspInitMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMSA(void) {
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
-  VP8FTransformWHT = FTransformWHT;
-
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-  VP8CollectHistogram = CollectHistogram;
-
-  VP8EncPredLuma4 = Intra4Preds;
-  VP8EncPredLuma16 = Intra16Preds;
-  VP8EncPredChroma8 = IntraChromaPreds;
-
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE4x4 = SSE4x4;
-
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlock;
+  VP8ITransform = ITransform_MSA;
+  VP8FTransform = FTransform_MSA;
+  VP8FTransformWHT = FTransformWHT_MSA;
+
+  VP8TDisto4x4 = Disto4x4_MSA;
+  VP8TDisto16x16 = Disto16x16_MSA;
+  VP8CollectHistogram = CollectHistogram_MSA;
+
+  VP8EncPredLuma4 = Intra4Preds_MSA;
+  VP8EncPredLuma16 = Intra16Preds_MSA;
+  VP8EncPredChroma8 = IntraChromaPreds_MSA;
+
+  VP8SSE16x16 = SSE16x16_MSA;
+  VP8SSE16x8 = SSE16x8_MSA;
+  VP8SSE8x8 = SSE8x8_MSA;
+  VP8SSE4x4 = SSE4x4_MSA;
+
+  VP8EncQuantizeBlock = QuantizeBlock_MSA;
+  VP8EncQuantize2Blocks = Quantize2Blocks_MSA;
+  VP8EncQuantizeBlockWHT = QuantizeBlock_MSA;
 }
 
 #else  // !WEBP_USE_MSA
diff --git a/thirdparty/libwebp/dsp/enc_neon.c b/thirdparty/libwebp/src/dsp/enc_neon.c
index 6a078d632d..43bf1245c5 100644
--- a/thirdparty/libwebp/dsp/enc_neon.c
+++ b/thirdparty/libwebp/src/dsp/enc_neon.c
@@ -11,14 +11,14 @@
 //
 // adapted from libvpx (http://www.webmproject.org/code/)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
 #include <assert.h>
 
-#include "./neon.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/neon.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
@@ -37,15 +37,15 @@ static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
 #if defined(WEBP_USE_INTRINSICS)
 
 // Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
-static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
+static WEBP_INLINE int16x8_t ConvertU8ToS16_NEON(uint32x2_t v) {
   return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
 }
 
 // Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
 // to the corresponding rows of 'dst'.
-static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
-                                            const int16x8_t dst01,
-                                            const int16x8_t dst23) {
+static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
+                                                 const int16x8_t dst01,
+                                                 const int16x8_t dst23) {
   // Unsigned saturate to 8b.
   const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
   const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
@@ -57,8 +57,10 @@ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
   vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
 }
 
-static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
-                               const uint8_t* const ref, uint8_t* const dst) {
+static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
+                                    const int16x8_t row23,
+                                    const uint8_t* const ref,
+                                    uint8_t* const dst) {
   uint32x2_t dst01 = vdup_n_u32(0);
   uint32x2_t dst23 = vdup_n_u32(0);
 
@@ -70,19 +72,20 @@ static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
 
   {
     // Convert to 16b.
-    const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
-    const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
+    const int16x8_t dst01_s16 = ConvertU8ToS16_NEON(dst01);
+    const int16x8_t dst23_s16 = ConvertU8ToS16_NEON(dst23);
 
     // Descale with rounding.
     const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
     const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
     // Add the inverse transform.
-    SaturateAndStore4x4(dst, out01, out23);
+    SaturateAndStore4x4_NEON(dst, out01, out23);
   }
 }
 
-static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
-                                     int16x8x2_t* const out) {
+static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
+                                          const int16x8_t in1,
+                                          int16x8x2_t* const out) {
   // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
   // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
   const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
@@ -90,7 +93,7 @@ static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
   *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
 }
 
-static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
   // {rows} = in0 | in4
   //          in8 | in12
   // B1 = in4 | in12
@@ -113,22 +116,22 @@ static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
   const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
   const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
   const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
-  Transpose8x2(E0, E1, rows);
+  Transpose8x2_NEON(E0, E1, rows);
 }
 
-static void ITransformOne(const uint8_t* ref,
-                          const int16_t* in, uint8_t* dst) {
+static void ITransformOne_NEON(const uint8_t* ref,
+                               const int16_t* in, uint8_t* dst) {
   int16x8x2_t rows;
   INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
-  TransformPass(&rows);
-  TransformPass(&rows);
-  Add4x4(rows.val[0], rows.val[1], ref, dst);
+  TransformPass_NEON(&rows);
+  TransformPass_NEON(&rows);
+  Add4x4_NEON(rows.val[0], rows.val[1], ref, dst);
 }
 
 #else
 
-static void ITransformOne(const uint8_t* ref,
-                          const int16_t* in, uint8_t* dst) {
+static void ITransformOne_NEON(const uint8_t* ref,
+                               const int16_t* in, uint8_t* dst) {
   const int kBPS = BPS;
   const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
 
@@ -243,16 +246,16 @@ static void ITransformOne(const uint8_t* ref,
 
 #endif    // WEBP_USE_INTRINSICS
 
-static void ITransform(const uint8_t* ref,
-                       const int16_t* in, uint8_t* dst, int do_two) {
-  ITransformOne(ref, in, dst);
+static void ITransform_NEON(const uint8_t* ref,
+                            const int16_t* in, uint8_t* dst, int do_two) {
+  ITransformOne_NEON(ref, in, dst);
   if (do_two) {
-    ITransformOne(ref + 4, in + 16, dst + 4);
+    ITransformOne_NEON(ref + 4, in + 16, dst + 4);
   }
 }
 
 // Load all 4x4 pixels into a single uint8x16_t variable.
-static uint8x16_t Load4x4(const uint8_t* src) {
+static uint8x16_t Load4x4_NEON(const uint8_t* src) {
   uint32x4_t out = vdupq_n_u32(0);
   out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
   out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
@@ -265,10 +268,12 @@ static uint8x16_t Load4x4(const uint8_t* src) {
 
 #if defined(WEBP_USE_INTRINSICS)
 
-static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
-                                         const int16x4_t C, const int16x4_t D,
-                                         int16x8_t* const out01,
-                                         int16x8_t* const out32) {
+static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
+                                              const int16x4_t B,
+                                              const int16x4_t C,
+                                              const int16x4_t D,
+                                              int16x8_t* const out01,
+                                              int16x8_t* const out32) {
   const int16x4x2_t AB = vtrn_s16(A, B);
   const int16x4x2_t CD = vtrn_s16(C, D);
   const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
@@ -283,24 +288,24 @@ static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
                    vreinterpret_s64_s32(tmp02.val[1])));
 }
 
-static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
-                                         const uint8x8_t b) {
+static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
+                                              const uint8x8_t b) {
   return vreinterpretq_s16_u16(vsubl_u8(a, b));
 }
 
-static void FTransform(const uint8_t* src, const uint8_t* ref,
-                       int16_t* out) {
+static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
+                            int16_t* out) {
   int16x8_t d0d1, d3d2;   // working 4x4 int16 variables
   {
-    const uint8x16_t S0 = Load4x4(src);
-    const uint8x16_t R0 = Load4x4(ref);
-    const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
-    const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
+    const uint8x16_t S0 = Load4x4_NEON(src);
+    const uint8x16_t R0 = Load4x4_NEON(ref);
+    const int16x8_t D0D1 = DiffU8ToS16_NEON(vget_low_u8(S0), vget_low_u8(R0));
+    const int16x8_t D2D3 = DiffU8ToS16_NEON(vget_high_u8(S0), vget_high_u8(R0));
     const int16x4_t D0 = vget_low_s16(D0D1);
     const int16x4_t D1 = vget_high_s16(D0D1);
     const int16x4_t D2 = vget_low_s16(D2D3);
     const int16x4_t D3 = vget_high_s16(D2D3);
-    Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
+    Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
   }
   {    // 1rst pass
     const int32x4_t kCst937 = vdupq_n_s32(937);
@@ -318,7 +323,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
     const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
     const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
     const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
-    Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
+    Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
   }
   {    // 2nd pass
     // the (1<<16) addition is for the replacement: a3!=0  <-> 1-(a3==0)
@@ -358,8 +363,8 @@ static const int32_t kCoeff32[] = {
   51000, 51000, 51000, 51000
 };
 
-static void FTransform(const uint8_t* src, const uint8_t* ref,
-                       int16_t* out) {
+static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
+                            int16_t* out) {
   const int kBPS = BPS;
   const uint8_t* src_ptr = src;
   const uint8_t* ref_ptr = ref;
@@ -478,7 +483,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
   src += stride;                                    \
 } while (0)
 
-static void FTransformWHT(const int16_t* src, int16_t* out) {
+static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
   const int stride = 16;
   const int16x4_t zero = vdup_n_s16(0);
   int32x4x4_t tmp0;
@@ -516,7 +521,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
     tmp0.val[3] = vsubq_s32(a0, a1);
   }
   {
-    const int32x4x4_t tmp1 = Transpose4x4(tmp0);
+    const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0);
     // a0 = tmp[0 + i] + tmp[ 8 + i]
     // a1 = tmp[4 + i] + tmp[12 + i]
     // a2 = tmp[4 + i] - tmp[12 + i]
@@ -560,7 +565,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
 // a 26ae, b 26ae
 // a 37bf, b 37bf
 //
-static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
+static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
   const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
   const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
   const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
@@ -574,7 +579,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
   return q4_in;
 }
 
-static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
+static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
+    const int16x8x4_t q4_in) {
   // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
   // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
   const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
@@ -593,7 +599,7 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
   return q4_out;
 }
 
-static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
+static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
   const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
                                                         q4_in.val[2]));
   const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
@@ -610,7 +616,7 @@ static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
   return q4_out;
 }
 
-static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
+static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
   const uint16x8_t q_w07 = vld1q_u16(&w[0]);
   const uint16x8_t q_w8f = vld1q_u16(&w[8]);
   int16x4x4_t d4_w;
@@ -622,8 +628,8 @@ static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
   return d4_w;
 }
 
-static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
-                                      const int16x4x4_t d4_w) {
+static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
+                                           const int16x4x4_t d4_w) {
   int32x2_t d_sum;
   // sum += w[ 0] * abs(b0);
   // sum += w[ 4] * abs(b1);
@@ -652,8 +658,8 @@ static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
+static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
+                         const uint16_t* const w) {
   uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
   uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
   uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
@@ -679,12 +685,12 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
     // Vertical pass first to avoid a transpose (vertical and horizontal passes
     // are commutative because w/kWeightY is symmetric) and subsequent
     // transpose.
-    const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
-    const int16x4x4_t d4_w = DistoLoadW(w);
+    const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in);
+    const int16x4x4_t d4_w = DistoLoadW_NEON(w);
     // horizontal pass
-    const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
-    const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
-    int32x2_t d_sum = DistoSum(q4_h, d4_w);
+    const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v);
+    const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t);
+    int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w);
 
     // abs(sum2 - sum1) >> 5
     d_sum = vabs_s32(d_sum);
@@ -694,13 +700,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
 }
 #undef LOAD_LANE_32b
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
+                           const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_NEON(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -708,15 +714,15 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 
 //------------------------------------------------------------------------------
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
+                                  int start_block, int end_block,
+                                  VP8Histogram* const histo) {
   const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
   int j;
   int distribution[MAX_COEFF_THRESH + 1] = { 0 };
   for (j = start_block; j < end_block; ++j) {
     int16_t out[16];
-    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
     {
       int k;
       const int16x8_t a0 = vld1q_s16(out + 0);
@@ -740,9 +746,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
 
 //------------------------------------------------------------------------------
 
-static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
-                                        const uint8_t* const b,
-                                        uint32x4_t* const sum) {
+static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
+                                             const uint8_t* const b,
+                                             uint32x4_t* const sum) {
   const uint8x16_t a0 = vld1q_u8(a);
   const uint8x16_t b0 = vld1q_u8(b);
   const uint8x16_t abs_diff = vabdq_u8(a0, b0);
@@ -757,7 +763,7 @@ static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
 }
 
 // Horizontal sum of all four uint32_t values in 'sum'.
-static int SumToInt(uint32x4_t sum) {
+static int SumToInt_NEON(uint32x4_t sum) {
   const uint64x2_t sum2 = vpaddlq_u32(sum);
   const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
   return (int)sum3;
@@ -767,18 +773,18 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
   uint32x4_t sum = vdupq_n_u32(0);
   int y;
   for (y = 0; y < 16; ++y) {
-    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+    AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
   }
-  return SumToInt(sum);
+  return SumToInt_NEON(sum);
 }
 
 static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
   uint32x4_t sum = vdupq_n_u32(0);
   int y;
   for (y = 0; y < 8; ++y) {
-    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+    AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
   }
-  return SumToInt(sum);
+  return SumToInt_NEON(sum);
 }
 
 static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
@@ -791,12 +797,12 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
     const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
     sum = vpadalq_u16(sum, prod);
   }
-  return SumToInt(sum);
+  return SumToInt_NEON(sum);
 }
 
 static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
-  const uint8x16_t a0 = Load4x4(a);
-  const uint8x16_t b0 = Load4x4(b);
+  const uint8x16_t a0 = Load4x4_NEON(a);
+  const uint8x16_t b0 = Load4x4_NEON(b);
   const uint8x16_t abs_diff = vabdq_u8(a0, b0);
   const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
                                     vget_low_u8(abs_diff));
@@ -805,7 +811,7 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
   /* pair-wise adds and widen */
   const uint32x4_t sum1 = vpaddlq_u16(prod1);
   const uint32x4_t sum2 = vpaddlq_u16(prod2);
-  return SumToInt(vaddq_u32(sum1, sum2));
+  return SumToInt_NEON(vaddq_u32(sum1, sum2));
 }
 
 //------------------------------------------------------------------------------
@@ -813,8 +819,8 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
 // Compilation with gcc-4.6.x is problematic for now.
 #if !defined(WORK_AROUND_GCC)
 
-static int16x8_t Quantize(int16_t* const in,
-                          const VP8Matrix* const mtx, int offset) {
+static int16x8_t Quantize_NEON(int16_t* const in,
+                               const VP8Matrix* const mtx, int offset) {
   const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
   const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
   const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
@@ -847,10 +853,10 @@ static const uint8_t kShuffles[4][8] = {
   { 14, 15, 22, 23, 28, 29, 30, 31 }
 };
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
-  const int16x8_t out0 = Quantize(in, mtx, 0);
-  const int16x8_t out1 = Quantize(in, mtx, 8);
+static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
+                              const VP8Matrix* const mtx) {
+  const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
+  const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
   uint8x8x4_t shuffles;
   // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
   // non-standard versions there.
@@ -889,11 +895,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
   return 0;
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
+                                const VP8Matrix* const mtx) {
   int nz;
-  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
-  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  nz  = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
   return nz;
 }
 
@@ -905,14 +911,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 extern void VP8EncDspInitNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
+  VP8ITransform = ITransform_NEON;
+  VP8FTransform = FTransform_NEON;
 
-  VP8FTransformWHT = FTransformWHT;
+  VP8FTransformWHT = FTransformWHT_NEON;
 
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-  VP8CollectHistogram = CollectHistogram;
+  VP8TDisto4x4 = Disto4x4_NEON;
+  VP8TDisto16x16 = Disto16x16_NEON;
+  VP8CollectHistogram = CollectHistogram_NEON;
 
   VP8SSE16x16 = SSE16x16_NEON;
   VP8SSE16x8 = SSE16x8_NEON;
@@ -920,8 +926,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
   VP8SSE4x4 = SSE4x4_NEON;
 
 #if !defined(WORK_AROUND_GCC)
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8EncQuantizeBlock = QuantizeBlock_NEON;
+  VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
 #endif
 }
 
diff --git a/thirdparty/libwebp/dsp/enc_sse2.c b/thirdparty/libwebp/src/dsp/enc_sse2.c
index 2026a74c91..7b3f142c31 100644
--- a/thirdparty/libwebp/dsp/enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/enc_sse2.c
@@ -11,23 +11,23 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 #include <assert.h>
 #include <stdlib.h>  // for abs()
 #include <emmintrin.h>
 
-#include "./common_sse2.h"
-#include "../enc/cost_enc.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/common_sse2.h"
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
 // Does one or two inverse transforms.
-static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                       int do_two) {
+static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                            int do_two) {
   // This implementation makes use of 16-bit fixed point versions of two
   // multiply constants:
   //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@@ -193,10 +193,10 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
   }
 }
 
-static void FTransformPass1(const __m128i* const in01,
-                            const __m128i* const in23,
-                            __m128i* const out01,
-                            __m128i* const out32) {
+static void FTransformPass1_SSE2(const __m128i* const in01,
+                                 const __m128i* const in23,
+                                 __m128i* const out01,
+                                 __m128i* const out32) {
   const __m128i k937 = _mm_set1_epi32(937);
   const __m128i k1812 = _mm_set1_epi32(1812);
 
@@ -239,8 +239,9 @@ static void FTransformPass1(const __m128i* const in01,
   *out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));  // 3 2 3 2 3 2..
 }
 
-static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
-                            int16_t* out) {
+static void FTransformPass2_SSE2(const __m128i* const v01,
+                                 const __m128i* const v32,
+                                 int16_t* out) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i seven = _mm_set1_epi16(7);
   const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
@@ -291,7 +292,8 @@ static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
   _mm_storeu_si128((__m128i*)&out[8], d2_f3);
 }
 
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
+                            int16_t* out) {
   const __m128i zero = _mm_setzero_si128();
   // Load src.
   const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
@@ -328,13 +330,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   __m128i v01, v32;
 
   // First pass
-  FTransformPass1(&row01, &row23, &v01, &v32);
+  FTransformPass1_SSE2(&row01, &row23, &v01, &v32);
 
   // Second pass
-  FTransformPass2(&v01, &v32, out);
+  FTransformPass2_SSE2(&v01, &v32, out);
 }
 
-static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
+                             int16_t* out) {
   const __m128i zero = _mm_setzero_si128();
 
   // Load src and convert to 16b.
@@ -374,15 +377,15 @@ static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   __m128i v01h, v32h;
 
   // First pass
-  FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
-  FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
+  FTransformPass1_SSE2(&shuf01l, &shuf23l, &v01l, &v32l);
+  FTransformPass1_SSE2(&shuf01h, &shuf23h, &v01h, &v32h);
 
   // Second pass
-  FTransformPass2(&v01l, &v32l, out + 0);
-  FTransformPass2(&v01h, &v32h, out + 16);
+  FTransformPass2_SSE2(&v01l, &v32l, out + 0);
+  FTransformPass2_SSE2(&v01h, &v32h, out + 16);
 }
 
-static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
+static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
   const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
   const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
   const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
@@ -398,14 +401,14 @@ static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
   *out = _mm_madd_epi16(D, kMult);
 }
 
-static void FTransformWHT(const int16_t* in, int16_t* out) {
+static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
   // Input is 12b signed.
   __m128i row0, row1, row2, row3;
   // Rows are 14b signed.
-  FTransformWHTRow(in + 0 * 64, &row0);
-  FTransformWHTRow(in + 1 * 64, &row1);
-  FTransformWHTRow(in + 2 * 64, &row2);
-  FTransformWHTRow(in + 3 * 64, &row3);
+  FTransformWHTRow_SSE2(in + 0 * 64, &row0);
+  FTransformWHTRow_SSE2(in + 1 * 64, &row1);
+  FTransformWHTRow_SSE2(in + 2 * 64, &row2);
+  FTransformWHTRow_SSE2(in + 3 * 64, &row3);
 
   {
     // The a* are 15b signed.
@@ -431,9 +434,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
 // Compute susceptibility based on DCT-coeff histograms:
 // the higher, the "easier" the macroblock is to compress.
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
+                                  int start_block, int end_block,
+                                  VP8Histogram* const histo) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
   int j;
@@ -442,7 +445,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
     int16_t out[16];
     int k;
 
-    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    FTransform_SSE2(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
 
     // Convert coefficients to bin (within out[]).
     {
@@ -476,7 +479,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
 // Intra predictions
 
 // helper for chroma-DC predictions
-static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
   int j;
   const __m128i values = _mm_set1_epi8(v);
   for (j = 0; j < 8; ++j) {
@@ -484,7 +487,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
   }
 }
 
-static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
   int j;
   const __m128i values = _mm_set1_epi8(v);
   for (j = 0; j < 16; ++j) {
@@ -492,20 +495,20 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
   }
 }
 
-static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
   if (size == 4) {
     int j;
     for (j = 0; j < 4; ++j) {
       memset(dst + j * BPS, value, 4);
     }
   } else if (size == 8) {
-    Put8x8uv(value, dst);
+    Put8x8uv_SSE2(value, dst);
   } else {
-    Put16(value, dst);
+    Put16_SSE2(value, dst);
   }
 }
 
-static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
   int j;
   const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
   for (j = 0; j < 8; ++j) {
@@ -513,7 +516,7 @@ static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
   }
 }
 
-static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
   const __m128i top_values = _mm_load_si128((const __m128i*)top);
   int j;
   for (j = 0; j < 16; ++j) {
@@ -521,20 +524,20 @@ static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
   }
 }
 
-static WEBP_INLINE void VerticalPred(uint8_t* dst,
-                                     const uint8_t* top, int size) {
+static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
+                                          const uint8_t* top, int size) {
   if (top != NULL) {
     if (size == 8) {
-      VE8uv(dst, top);
+      VE8uv_SSE2(dst, top);
     } else {
-      VE16(dst, top);
+      VE16_SSE2(dst, top);
     }
   } else {
-    Fill(dst, 127, size);
+    Fill_SSE2(dst, 127, size);
   }
 }
 
-static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
+static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
   int j;
   for (j = 0; j < 8; ++j) {
     const __m128i values = _mm_set1_epi8(left[j]);
@@ -543,7 +546,7 @@ static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
   }
 }
 
-static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
+static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
   int j;
   for (j = 0; j < 16; ++j) {
     const __m128i values = _mm_set1_epi8(left[j]);
@@ -552,21 +555,21 @@ static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
   }
 }
 
-static WEBP_INLINE void HorizontalPred(uint8_t* dst,
-                                       const uint8_t* left, int size) {
+static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
+                                            const uint8_t* left, int size) {
   if (left != NULL) {
     if (size == 8) {
-      HE8uv(dst, left);
+      HE8uv_SSE2(dst, left);
     } else {
-      HE16(dst, left);
+      HE16_SSE2(dst, left);
     }
   } else {
-    Fill(dst, 129, size);
+    Fill_SSE2(dst, 129, size);
   }
 }
 
-static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
-                           const uint8_t* top, int size) {
+static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
+                                const uint8_t* top, int size) {
   const __m128i zero = _mm_setzero_si128();
   int y;
   if (size == 8) {
@@ -593,13 +596,13 @@ static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
   }
 }
 
-static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
-                                   const uint8_t* top, int size) {
+static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
+                                        const uint8_t* top, int size) {
   if (left != NULL) {
     if (top != NULL) {
-      TM(dst, left, top, size);
+      TM_SSE2(dst, left, top, size);
     } else {
-      HorizontalPred(dst, left, size);
+      HorizontalPred_SSE2(dst, left, size);
     }
   } else {
     // true motion without left samples (hence: with default 129 value)
@@ -607,90 +610,90 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
     // Note that if top samples are not available, the default value is
     // then 129, and not 127 as in the VerticalPred case.
     if (top != NULL) {
-      VerticalPred(dst, top, size);
+      VerticalPred_SSE2(dst, top, size);
     } else {
-      Fill(dst, 129, size);
+      Fill_SSE2(dst, 129, size);
     }
   }
 }
 
-static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
-                              const uint8_t* top) {
+static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
+                                   const uint8_t* top) {
   const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
   const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
   const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
   const int DC = VP8HorizontalAdd8b(&combined) + 8;
-  Put8x8uv(DC >> 4, dst);
+  Put8x8uv_SSE2(DC >> 4, dst);
 }
 
-static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
   const __m128i sum = _mm_sad_epu8(top_values, zero);
   const int DC = _mm_cvtsi128_si32(sum) + 4;
-  Put8x8uv(DC >> 3, dst);
+  Put8x8uv_SSE2(DC >> 3, dst);
 }
 
-static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
+static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
   // 'left' is contiguous so we can reuse the top summation.
-  DC8uvNoLeft(dst, left);
+  DC8uvNoLeft_SSE2(dst, left);
 }
 
-static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
-  Put8x8uv(0x80, dst);
+static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
+  Put8x8uv_SSE2(0x80, dst);
 }
 
-static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
-                                  const uint8_t* top) {
+static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
+                                       const uint8_t* top) {
   if (top != NULL) {
     if (left != NULL) {  // top and left present
-      DC8uv(dst, left, top);
+      DC8uv_SSE2(dst, left, top);
     } else {  // top, but no left
-      DC8uvNoLeft(dst, top);
+      DC8uvNoLeft_SSE2(dst, top);
     }
   } else if (left != NULL) {  // left but no top
-    DC8uvNoTop(dst, left);
+    DC8uvNoTop_SSE2(dst, left);
   } else {  // no top, no left, nothing.
-    DC8uvNoTopLeft(dst);
+    DC8uvNoTopLeft_SSE2(dst);
   }
 }
 
-static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
-                             const uint8_t* top) {
+static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
+                                  const uint8_t* top) {
   const __m128i top_row = _mm_load_si128((const __m128i*)top);
   const __m128i left_row = _mm_load_si128((const __m128i*)left);
   const int DC =
       VP8HorizontalAdd8b(&top_row) + VP8HorizontalAdd8b(&left_row) + 16;
-  Put16(DC >> 5, dst);
+  Put16_SSE2(DC >> 5, dst);
 }
 
-static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
   const __m128i top_row = _mm_load_si128((const __m128i*)top);
   const int DC = VP8HorizontalAdd8b(&top_row) + 8;
-  Put16(DC >> 4, dst);
+  Put16_SSE2(DC >> 4, dst);
 }
 
-static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
+static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
   // 'left' is contiguous so we can reuse the top summation.
-  DC16NoLeft(dst, left);
+  DC16NoLeft_SSE2(dst, left);
 }
 
-static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
-  Put16(0x80, dst);
+static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
+  Put16_SSE2(0x80, dst);
 }
 
-static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
-                                 const uint8_t* top) {
+static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
+                                      const uint8_t* top) {
   if (top != NULL) {
     if (left != NULL) {  // top and left present
-      DC16(dst, left, top);
+      DC16_SSE2(dst, left, top);
     } else {  // top, but no left
-      DC16NoLeft(dst, top);
+      DC16NoLeft_SSE2(dst, top);
     }
   } else if (left != NULL) {  // left but no top
-    DC16NoTop(dst, left);
+    DC16NoTop_SSE2(dst, left);
   } else {  // no top, no left, nothing.
-    DC16NoTopLeft(dst);
+    DC16NoTopLeft_SSE2(dst);
   }
 }
 
@@ -709,7 +712,8 @@ static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
 //   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
 //   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
 
-static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) {  // vertical
+static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // vertical
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
   const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -725,7 +729,8 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) {  // vertical
   }
 }
 
-static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) {  // horizontal
+static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // horizontal
   const int X = top[-1];
   const int I = top[-2];
   const int J = top[-3];
@@ -737,14 +742,15 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) {  // horizontal
   WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
 }
 
-static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
   uint32_t dc = 4;
   int i;
   for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
-  Fill(dst, dc >> 3, 4);
+  Fill_SSE2(dst, dc >> 3, 4);
 }
 
-static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {  // Down-Left
+static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // Down-Left
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
   const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -760,8 +766,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {  // Down-Left
   WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
 }
 
-static WEBP_INLINE void VR4(uint8_t* dst,
-                            const uint8_t* top) {  // Vertical-Right
+static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // Vertical-Right
   const __m128i one = _mm_set1_epi8(1);
   const int I = top[-2];
   const int J = top[-3];
@@ -786,8 +792,8 @@ static WEBP_INLINE void VR4(uint8_t* dst,
   DST(0, 3) = AVG3(K, J, I);
 }
 
-static WEBP_INLINE void VL4(uint8_t* dst,
-                            const uint8_t* top) {  // Vertical-Left
+static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // Vertical-Left
   const __m128i one = _mm_set1_epi8(1);
   const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
   const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@@ -812,7 +818,8 @@ static WEBP_INLINE void VL4(uint8_t* dst,
   DST(3, 3) = (extra_out >> 8) & 0xff;
 }
 
-static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {  // Down-right
+static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
+                                 const uint8_t* top) {  // Down-right
   const __m128i one = _mm_set1_epi8(1);
   const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
   const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
@@ -828,7 +835,7 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {  // Down-right
   WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
 }
 
-static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
   const int I = top[-2];
   const int J = top[-3];
   const int K = top[-4];
@@ -843,7 +850,7 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
   DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 
-static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
   const int X = top[-1];
   const int I = top[-2];
   const int J = top[-3];
@@ -866,7 +873,7 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
   DST(1, 3)             = AVG3(L, K, J);
 }
 
-static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
+static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
   const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
@@ -888,55 +895,56 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
 
 // Left samples are top[-5 .. -2], top_left is top[-1], top are
 // located at top[0..3], and top right is top[4..7]
-static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
-  DC4(I4DC4 + dst, top);
-  TM4(I4TM4 + dst, top);
-  VE4(I4VE4 + dst, top);
-  HE4(I4HE4 + dst, top);
-  RD4(I4RD4 + dst, top);
-  VR4(I4VR4 + dst, top);
-  LD4(I4LD4 + dst, top);
-  VL4(I4VL4 + dst, top);
-  HD4(I4HD4 + dst, top);
-  HU4(I4HU4 + dst, top);
+static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
+  DC4_SSE2(I4DC4 + dst, top);
+  TM4_SSE2(I4TM4 + dst, top);
+  VE4_SSE2(I4VE4 + dst, top);
+  HE4_SSE2(I4HE4 + dst, top);
+  RD4_SSE2(I4RD4 + dst, top);
+  VR4_SSE2(I4VR4 + dst, top);
+  LD4_SSE2(I4LD4 + dst, top);
+  VL4_SSE2(I4VL4 + dst, top);
+  HD4_SSE2(I4HD4 + dst, top);
+  HU4_SSE2(I4HU4 + dst, top);
 }
 
 //------------------------------------------------------------------------------
 // Chroma 8x8 prediction (paragraph 12.2)
 
-static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
-                             const uint8_t* top) {
+static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
+                                  const uint8_t* top) {
   // U block
-  DC8uvMode(C8DC8 + dst, left, top);
-  VerticalPred(C8VE8 + dst, top, 8);
-  HorizontalPred(C8HE8 + dst, left, 8);
-  TrueMotion(C8TM8 + dst, left, top, 8);
+  DC8uvMode_SSE2(C8DC8 + dst, left, top);
+  VerticalPred_SSE2(C8VE8 + dst, top, 8);
+  HorizontalPred_SSE2(C8HE8 + dst, left, 8);
+  TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
   // V block
   dst += 8;
   if (top != NULL) top += 8;
   if (left != NULL) left += 16;
-  DC8uvMode(C8DC8 + dst, left, top);
-  VerticalPred(C8VE8 + dst, top, 8);
-  HorizontalPred(C8HE8 + dst, left, 8);
-  TrueMotion(C8TM8 + dst, left, top, 8);
+  DC8uvMode_SSE2(C8DC8 + dst, left, top);
+  VerticalPred_SSE2(C8VE8 + dst, top, 8);
+  HorizontalPred_SSE2(C8HE8 + dst, left, 8);
+  TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
 }
 
 //------------------------------------------------------------------------------
 // luma 16x16 prediction (paragraph 12.3)
 
-static void Intra16Preds(uint8_t* dst,
-                         const uint8_t* left, const uint8_t* top) {
-  DC16Mode(I16DC16 + dst, left, top);
-  VerticalPred(I16VE16 + dst, top, 16);
-  HorizontalPred(I16HE16 + dst, left, 16);
-  TrueMotion(I16TM16 + dst, left, top, 16);
+static void Intra16Preds_SSE2(uint8_t* dst,
+                              const uint8_t* left, const uint8_t* top) {
+  DC16Mode_SSE2(I16DC16 + dst, left, top);
+  VerticalPred_SSE2(I16VE16 + dst, top, 16);
+  HorizontalPred_SSE2(I16HE16 + dst, left, 16);
+  TrueMotion_SSE2(I16TM16 + dst, left, top, 16);
 }
 
 //------------------------------------------------------------------------------
 // Metric
 
-static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
-                                              __m128i* const sum) {
+static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
+                                                   const __m128i b,
+                                                   __m128i* const sum) {
   // take abs(a-b) in 8b
   const __m128i a_b = _mm_subs_epu8(a, b);
   const __m128i b_a = _mm_subs_epu8(b, a);
@@ -951,8 +959,8 @@ static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
   *sum = _mm_add_epi32(sum1, sum2);
 }
 
-static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
-                                int num_pairs) {
+static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
+                                     int num_pairs) {
   __m128i sum = _mm_setzero_si128();
   int32_t tmp[4];
   int i;
@@ -963,8 +971,8 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
     const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
     const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
     __m128i sum1, sum2;
-    SubtractAndAccumulate(a0, b0, &sum1);
-    SubtractAndAccumulate(a1, b1, &sum2);
+    SubtractAndAccumulate_SSE2(a0, b0, &sum1);
+    SubtractAndAccumulate_SSE2(a1, b1, &sum2);
     sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
     a += 2 * BPS;
     b += 2 * BPS;
@@ -973,18 +981,18 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
   return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
 }
 
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
-  return SSE_16xN(a, b, 8);
+static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
+  return SSE_16xN_SSE2(a, b, 8);
 }
 
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
-  return SSE_16xN(a, b, 4);
+static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
+  return SSE_16xN_SSE2(a, b, 4);
 }
 
 #define LOAD_8x16b(ptr) \
   _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
 
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
   const __m128i zero = _mm_setzero_si128();
   int num_pairs = 4;
   __m128i sum = zero;
@@ -1011,7 +1019,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
 }
 #undef LOAD_8x16b
 
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
   const __m128i zero = _mm_setzero_si128();
 
   // Load values. Note that we read 8 pixels instead of 4,
@@ -1048,7 +1056,7 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 
 //------------------------------------------------------------------------------
 
-static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
+static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
   const __m128i mask = _mm_set1_epi16(0x00ff);
   const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
   const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
@@ -1086,8 +1094,8 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
-static int TTransform(const uint8_t* inA, const uint8_t* inB,
-                      const uint16_t* const w) {
+static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
+                           const uint16_t* const w) {
   int32_t sum[4];
   __m128i tmp_0, tmp_1, tmp_2, tmp_3;
   const __m128i zero = _mm_setzero_si128();
@@ -1187,19 +1195,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
   return sum[0] + sum[1] + sum[2] + sum[3];
 }
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
-  const int diff_sum = TTransform(a, b, w);
+static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
+                         const uint16_t* const w) {
+  const int diff_sum = TTransform_SSE2(a, b, w);
   return abs(diff_sum) >> 5;
 }
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
+                           const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_SSE2(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -1209,9 +1217,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 // Quantization
 //
 
-static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
-                                       const uint16_t* const sharpen,
-                                       const VP8Matrix* const mtx) {
+static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
+                                            const uint16_t* const sharpen,
+                                            const VP8Matrix* const mtx) {
   const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
   const __m128i zero = _mm_setzero_si128();
   __m128i coeff0, coeff8;
@@ -1321,22 +1329,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
   return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
 }
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
+                              const VP8Matrix* const mtx) {
+  return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
 }
 
-static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
-                            const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, NULL, mtx);
+static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
+                                 const VP8Matrix* const mtx) {
+  return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
+                                const VP8Matrix* const mtx) {
   int nz;
   const uint16_t* const sharpen = &mtx->sharpen_[0];
-  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
-  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+  nz  = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
   return nz;
 }
 
@@ -1346,139 +1354,28 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 extern void VP8EncDspInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
-  VP8CollectHistogram = CollectHistogram;
-  VP8EncPredLuma16 = Intra16Preds;
-  VP8EncPredChroma8 = IntraChromaPreds;
-  VP8EncPredLuma4 = Intra4Preds;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
-  VP8ITransform = ITransform;
-  VP8FTransform = FTransform;
-  VP8FTransform2 = FTransform2;
-  VP8FTransformWHT = FTransformWHT;
-  VP8SSE16x16 = SSE16x16;
-  VP8SSE16x8 = SSE16x8;
-  VP8SSE8x8 = SSE8x8;
-  VP8SSE4x4 = SSE4x4;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
-  VP8Mean16x4 = Mean16x4;
-}
-
-//------------------------------------------------------------------------------
-// SSIM / PSNR entry point (TODO(skal): move to its own file later)
-
-static uint32_t AccumulateSSE_SSE2(const uint8_t* src1,
-                                   const uint8_t* src2, int len) {
-  int i = 0;
-  uint32_t sse2 = 0;
-  if (len >= 16) {
-    const int limit = len - 32;
-    int32_t tmp[4];
-    __m128i sum1;
-    __m128i sum = _mm_setzero_si128();
-    __m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
-    __m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
-    i += 16;
-    while (i <= limit) {
-      const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]);
-      const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]);
-      __m128i sum2;
-      i += 16;
-      SubtractAndAccumulate(a0, b0, &sum1);
-      sum = _mm_add_epi32(sum, sum1);
-      a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
-      b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
-      i += 16;
-      SubtractAndAccumulate(a1, b1, &sum2);
-      sum = _mm_add_epi32(sum, sum2);
-    }
-    SubtractAndAccumulate(a0, b0, &sum1);
-    sum = _mm_add_epi32(sum, sum1);
-    _mm_storeu_si128((__m128i*)tmp, sum);
-    sse2 += (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
-  }
-
-  for (; i < len; ++i) {
-    const int32_t diff = src1[i] - src2[i];
-    sse2 += diff * diff;
-  }
-  return sse2;
-}
-
-static uint32_t HorizontalAdd16b(const __m128i* const m) {
-  uint16_t tmp[8];
-  const __m128i a = _mm_srli_si128(*m, 8);
-  const __m128i b = _mm_add_epi16(*m, a);
-  _mm_storeu_si128((__m128i*)tmp, b);
-  return (uint32_t)tmp[3] + tmp[2] + tmp[1] + tmp[0];
-}
-
-static uint32_t HorizontalAdd32b(const __m128i* const m) {
-  const __m128i a = _mm_srli_si128(*m, 8);
-  const __m128i b = _mm_add_epi32(*m, a);
-  const __m128i c = _mm_add_epi32(b, _mm_srli_si128(b, 4));
-  return (uint32_t)_mm_cvtsi128_si32(c);
-}
-
-static const uint16_t kWeight[] = { 1, 2, 3, 4, 3, 2, 1, 0 };
-
-#define ACCUMULATE_ROW(WEIGHT) do {                         \
-  /* compute row weight (Wx * Wy) */                        \
-  const __m128i Wy = _mm_set1_epi16((WEIGHT));              \
-  const __m128i W = _mm_mullo_epi16(Wx, Wy);                \
-  /* process 8 bytes at a time (7 bytes, actually) */       \
-  const __m128i a0 = _mm_loadl_epi64((const __m128i*)src1); \
-  const __m128i b0 = _mm_loadl_epi64((const __m128i*)src2); \
-  /* convert to 16b and multiply by weight */               \
-  const __m128i a1 = _mm_unpacklo_epi8(a0, zero);           \
-  const __m128i b1 = _mm_unpacklo_epi8(b0, zero);           \
-  const __m128i wa1 = _mm_mullo_epi16(a1, W);               \
-  const __m128i wb1 = _mm_mullo_epi16(b1, W);               \
-  /* accumulate */                                          \
-  xm  = _mm_add_epi16(xm, wa1);                             \
-  ym  = _mm_add_epi16(ym, wb1);                             \
-  xxm = _mm_add_epi32(xxm, _mm_madd_epi16(a1, wa1));        \
-  xym = _mm_add_epi32(xym, _mm_madd_epi16(a1, wb1));        \
-  yym = _mm_add_epi32(yym, _mm_madd_epi16(b1, wb1));        \
-  src1 += stride1;                                          \
-  src2 += stride2;                                          \
-} while (0)
-
-static double SSIMGet_SSE2(const uint8_t* src1, int stride1,
-                           const uint8_t* src2, int stride2) {
-  VP8DistoStats stats;
-  const __m128i zero = _mm_setzero_si128();
-  __m128i xm = zero, ym = zero;                // 16b accums
-  __m128i xxm = zero, yym = zero, xym = zero;  // 32b accum
-  const __m128i Wx = _mm_loadu_si128((const __m128i*)kWeight);
-  assert(2 * VP8_SSIM_KERNEL + 1 == 7);
-  ACCUMULATE_ROW(1);
-  ACCUMULATE_ROW(2);
-  ACCUMULATE_ROW(3);
-  ACCUMULATE_ROW(4);
-  ACCUMULATE_ROW(3);
-  ACCUMULATE_ROW(2);
-  ACCUMULATE_ROW(1);
-  stats.xm  = HorizontalAdd16b(&xm);
-  stats.ym  = HorizontalAdd16b(&ym);
-  stats.xxm = HorizontalAdd32b(&xxm);
-  stats.xym = HorizontalAdd32b(&xym);
-  stats.yym = HorizontalAdd32b(&yym);
-  return VP8SSIMFromStats(&stats);
-}
-
-extern void VP8SSIMDspInitSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInitSSE2(void) {
-  VP8AccumulateSSE = AccumulateSSE_SSE2;
-  VP8SSIMGet = SSIMGet_SSE2;
+  VP8CollectHistogram = CollectHistogram_SSE2;
+  VP8EncPredLuma16 = Intra16Preds_SSE2;
+  VP8EncPredChroma8 = IntraChromaPreds_SSE2;
+  VP8EncPredLuma4 = Intra4Preds_SSE2;
+  VP8EncQuantizeBlock = QuantizeBlock_SSE2;
+  VP8EncQuantize2Blocks = Quantize2Blocks_SSE2;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE2;
+  VP8ITransform = ITransform_SSE2;
+  VP8FTransform = FTransform_SSE2;
+  VP8FTransform2 = FTransform2_SSE2;
+  VP8FTransformWHT = FTransformWHT_SSE2;
+  VP8SSE16x16 = SSE16x16_SSE2;
+  VP8SSE16x8 = SSE16x8_SSE2;
+  VP8SSE8x8 = SSE8x8_SSE2;
+  VP8SSE4x4 = SSE4x4_SSE2;
+  VP8TDisto4x4 = Disto4x4_SSE2;
+  VP8TDisto16x16 = Disto16x16_SSE2;
+  VP8Mean16x4 = Mean16x4_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
 
 WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
-WEBP_DSP_INIT_STUB(VP8SSIMDspInitSSE2)
 
 #endif  // WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/enc_sse41.c b/thirdparty/libwebp/src/dsp/enc_sse41.c
index e32086d9fd..924035a644 100644
--- a/thirdparty/libwebp/dsp/enc_sse41.c
+++ b/thirdparty/libwebp/src/dsp/enc_sse41.c
@@ -11,21 +11,21 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE41)
 #include <smmintrin.h>
 #include <stdlib.h>  // for abs()
 
-#include "./common_sse2.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/common_sse2.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms.
 
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                             int start_block, int end_block,
-                             VP8Histogram* const histo) {
+static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
+                                   int start_block, int end_block,
+                                   VP8Histogram* const histo) {
   const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
   int j;
   int distribution[MAX_COEFF_THRESH + 1] = { 0 };
@@ -70,8 +70,8 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
-static int TTransform(const uint8_t* inA, const uint8_t* inB,
-                      const uint16_t* const w) {
+static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
+                            const uint16_t* const w) {
   int32_t sum[4];
   __m128i tmp_0, tmp_1, tmp_2, tmp_3;
 
@@ -168,19 +168,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
   return sum[0] + sum[1] + sum[2] + sum[3];
 }
 
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
-                    const uint16_t* const w) {
-  const int diff_sum = TTransform(a, b, w);
+static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
+                          const uint16_t* const w) {
+  const int diff_sum = TTransform_SSE41(a, b, w);
   return abs(diff_sum) >> 5;
 }
 
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
-                      const uint16_t* const w) {
+static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
+                            const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4(a + x + y, b + x + y, w);
+      D += Disto4x4_SSE41(a + x + y, b + x + y, w);
     }
   }
   return D;
@@ -197,9 +197,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
                2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
                2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
 
-static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
-                                       const uint16_t* const sharpen,
-                                       const VP8Matrix* const mtx) {
+static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
+                                             const uint16_t* const sharpen,
+                                             const VP8Matrix* const mtx) {
   const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
   const __m128i zero = _mm_setzero_si128();
   __m128i out0, out8;
@@ -300,22 +300,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
 
 #undef PSHUFB_CST
 
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
+                               const VP8Matrix* const mtx) {
+  return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
 }
 
-static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
-                            const VP8Matrix* const mtx) {
-  return DoQuantizeBlock(in, out, NULL, mtx);
+static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
+                                  const VP8Matrix* const mtx) {
+  return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
 }
 
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
-                           const VP8Matrix* const mtx) {
+static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
+                                 const VP8Matrix* const mtx) {
   int nz;
   const uint16_t* const sharpen = &mtx->sharpen_[0];
-  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
-  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+  nz  = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
   return nz;
 }
 
@@ -324,12 +324,12 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 
 extern void VP8EncDspInitSSE41(void);
 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
-  VP8CollectHistogram = CollectHistogram;
-  VP8EncQuantizeBlock = QuantizeBlock;
-  VP8EncQuantize2Blocks = Quantize2Blocks;
-  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
-  VP8TDisto4x4 = Disto4x4;
-  VP8TDisto16x16 = Disto16x16;
+  VP8CollectHistogram = CollectHistogram_SSE41;
+  VP8EncQuantizeBlock = QuantizeBlock_SSE41;
+  VP8EncQuantize2Blocks = Quantize2Blocks_SSE41;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE41;
+  VP8TDisto4x4 = Disto4x4_SSE41;
+  VP8TDisto16x16 = Disto16x16_SSE41;
 }
 
 #else  // !WEBP_USE_SSE41
diff --git a/thirdparty/libwebp/dsp/filters.c b/thirdparty/libwebp/src/dsp/filters.c
index 65f34aad1f..ca5f877da7 100644
--- a/thirdparty/libwebp/dsp/filters.c
+++ b/thirdparty/libwebp/src/dsp/filters.c
@@ -11,7 +11,7 @@
 //
 // Author: Urvang (urvang@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -20,16 +20,17 @@
 // Helpful macro.
 
 # define SANITY_CHECK(in, out)                                                 \
-  assert(in != NULL);                                                          \
-  assert(out != NULL);                                                         \
+  assert((in) != NULL);                                                        \
+  assert((out) != NULL);                                                       \
   assert(width > 0);                                                           \
   assert(height > 0);                                                          \
   assert(stride >= width);                                                     \
   assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
   (void)height;  // Silence unused warning.
 
-static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
-                                    uint8_t* dst, int length, int inverse) {
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
+                                      uint8_t* dst, int length, int inverse) {
   int i;
   if (inverse) {
     for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
@@ -41,10 +42,10 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
 //------------------------------------------------------------------------------
 // Horizontal filter.
 
-static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
-                                           int width, int height, int stride,
-                                           int row, int num_rows,
-                                           int inverse, uint8_t* out) {
+static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
+                                             int width, int height, int stride,
+                                             int row, int num_rows,
+                                             int inverse, uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -56,7 +57,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
   if (row == 0) {
     // Leftmost pixel is the same as input for topmost scanline.
     out[0] = in[0];
-    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
     row = 1;
     preds += stride;
     in += stride;
@@ -66,8 +67,8 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
   // Filter line-by-line.
   while (row < last_row) {
     // Leftmost pixel is predicted from above.
-    PredictLine(in, preds - stride, out, 1, inverse);
-    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    PredictLine_C(in, preds - stride, out, 1, inverse);
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
     ++row;
     preds += stride;
     in += stride;
@@ -78,10 +79,10 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
 //------------------------------------------------------------------------------
 // Vertical filter.
 
-static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows,
-                                         int inverse, uint8_t* out) {
+static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -94,7 +95,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
     // Very first top-left pixel is copied.
     out[0] = in[0];
     // Rest of top scan-line is left-predicted.
-    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
     row = 1;
     in += stride;
     out += stride;
@@ -105,26 +106,28 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
 
   // Filter line-by-line.
   while (row < last_row) {
-    PredictLine(in, preds, out, width, inverse);
+    PredictLine_C(in, preds, out, width, inverse);
     ++row;
     preds += stride;
     in += stride;
     out += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Gradient filter.
 
-static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
   const int g = a + b - c;
   return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
 }
 
-static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows,
-                                         int inverse, uint8_t* out) {
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -136,7 +139,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
   // left prediction for top scan-line
   if (row == 0) {
     out[0] = in[0];
-    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
     row = 1;
     preds += stride;
     in += stride;
@@ -147,11 +150,11 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
   while (row < last_row) {
     int w;
     // leftmost pixel: predict from above.
-    PredictLine(in, preds - stride, out, 1, inverse);
+    PredictLine_C(in, preds - stride, out, 1, inverse);
     for (w = 1; w < width; ++w) {
-      const int pred = GradientPredictor(preds[w - 1],
-                                         preds[w - stride],
-                                         preds[w - stride - 1]);
+      const int pred = GradientPredictor_C(preds[w - 1],
+                                           preds[w - stride],
+                                           preds[w - stride - 1]);
       out[w] = in[w] + (inverse ? pred : -pred);
     }
     ++row;
@@ -160,32 +163,34 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
     out += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef SANITY_CHECK
 
 //------------------------------------------------------------------------------
 
-static void HorizontalFilter(const uint8_t* data, int width, int height,
-                             int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+#if !WEBP_NEON_OMIT_C_CODE
+static void HorizontalFilter_C(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
+                       filtered_data);
 }
 
-static void VerticalFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+static void VerticalFilter_C(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
 }
 
-
-static void GradientFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+static void GradientFilter_C(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
 }
-
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 
-static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
-                               uint8_t* out, int width) {
+static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                                 uint8_t* out, int width) {
   uint8_t pred = (prev == NULL) ? 0 : prev[0];
   int i;
   for (i = 0; i < width; ++i) {
@@ -194,26 +199,28 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
   }
 }
 
-static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                               uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_C(NULL, in, out, width);
   } else {
     int i;
     for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
-static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                               uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_C(NULL, in, out, width);
   } else {
     uint8_t top = prev[0], top_left = top, left = top;
     int i;
     for (i = 0; i < width; ++i) {
       top = prev[i];  // need to read this first, in case prev==out
-      left = in[i] + GradientPredictor(left, top, top_left);
+      left = in[i] + GradientPredictor_C(left, top, top_left);
       top_left = top;
       out[i] = left;
     }
@@ -238,14 +245,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
   if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
 
   WebPUnfilters[WEBP_FILTER_NONE] = NULL;
-  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
-  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
-  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
+#endif
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
 
   WebPFilters[WEBP_FILTER_NONE] = NULL;
-  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
-  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
-  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
+#endif
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -253,11 +264,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
       VP8FiltersInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8FiltersInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       VP8FiltersInitMIPSdspR2();
@@ -269,5 +275,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8FiltersInitNEON();
+  }
+#endif
+
+  assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
+  assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
+
   filters_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/filters_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
index 1d82e3c2e1..9382b12823 100644
--- a/thirdparty/libwebp/dsp/filters_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
@@ -12,11 +12,11 @@
 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
 //            Djordje Pesut (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "../dsp/dsp.h"
+#include "src/dsp/dsp.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -101,8 +101,8 @@
     );                                                                         \
   } while (0)
 
-static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
-                                    int length) {
+static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
+                                              int length) {
   DO_PREDICT_LINE(src, dst, length, 0);
 }
 
@@ -192,10 +192,11 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
     }                                                                          \
   } while (0)
 
-static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
-                                           int width, int height, int stride,
-                                           int row, int num_rows,
-                                           uint8_t* out) {
+static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
+                                                     int width, int height,
+                                                     int stride,
+                                                     int row, int num_rows,
+                                                     uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -207,7 +208,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
   if (row == 0) {
     // Leftmost pixel is the same as input for topmost scanline.
     out[0] = in[0];
-    PredictLine(in + 1, out + 1, width - 1);
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
     row = 1;
     preds += stride;
     in += stride;
@@ -219,9 +220,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
 }
 #undef FILTER_LINE_BY_LINE
 
-static void HorizontalFilter(const uint8_t* data, int width, int height,
-                             int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
+static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
+                                       int width, int height,
+                                       int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                               filtered_data);
 }
 
 //------------------------------------------------------------------------------
@@ -237,9 +240,11 @@ static void HorizontalFilter(const uint8_t* data, int width, int height,
     }                                                                          \
   } while (0)
 
-static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows, uint8_t* out) {
+static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
+                                                   int width, int height,
+                                                   int stride,
+                                                   int row, int num_rows,
+                                                   uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -252,7 +257,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
     // Very first top-left pixel is copied.
     out[0] = in[0];
     // Rest of top scan-line is left-predicted.
-    PredictLine(in + 1, out + 1, width - 1);
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
     row = 1;
     in += stride;
     out += stride;
@@ -266,15 +271,16 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
 }
 #undef FILTER_LINE_BY_LINE
 
-static void VerticalFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
+static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
+                                     int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                             filtered_data);
 }
 
 //------------------------------------------------------------------------------
 // Gradient filter.
 
-static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
   int temp0;
   __asm__ volatile (
     "addu             %[temp0],   %[a],       %[b]        \n\t"
@@ -293,9 +299,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
       int w;                                                                   \
       PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                          \
       for (w = 1; w < width; ++w) {                                            \
-        const int pred = GradientPredictor(PREDS[w - 1],                       \
-                                           PREDS[w - stride],                  \
-                                           PREDS[w - stride - 1]);             \
+        const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1],             \
+                                                     PREDS[w - stride],        \
+                                                     PREDS[w - stride - 1]);   \
         out[w] = in[w] OPERATION pred;                                         \
       }                                                                        \
       ++row;                                                                   \
@@ -304,9 +310,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
     }                                                                          \
   } while (0)
 
-static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows, uint8_t* out) {
+static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
+                                       int width, int height, int stride,
+                                       int row, int num_rows, uint8_t* out) {
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
@@ -318,7 +324,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
   // left prediction for top scan-line
   if (row == 0) {
     out[0] = in[0];
-    PredictLine(in + 1, out + 1, width - 1);
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
     row = 1;
     preds += stride;
     in += stride;
@@ -330,38 +336,39 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
 }
 #undef FILTER_LINE_BY_LINE
 
-static void GradientFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
+static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
+                                     int stride, uint8_t* filtered_data) {
+  DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                             filtered_data);
 }
 
 //------------------------------------------------------------------------------
 
-static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
-                               uint8_t* out, int width) {
+static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                         uint8_t* out, int width) {
  out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
  DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
 }
 
-static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                       uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
   } else {
     DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
   }
 }
 
-static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                       uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
   } else {
     uint8_t top = prev[0], top_left = top, left = top;
     int i;
     for (i = 0; i < width; ++i) {
       top = prev[i];  // need to read this first, in case prev==dst
-      left = in[i] + GradientPredictor(left, top, top_left);
+      left = in[i] + GradientPredictor_MIPSdspR2(left, top, top_left);
       top_left = top;
       out[i] = left;
     }
@@ -379,13 +386,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
 extern void VP8FiltersInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
-  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
-  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
-  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_MIPSdspR2;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_MIPSdspR2;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_MIPSdspR2;
 
-  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
-  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
-  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MIPSdspR2;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MIPSdspR2;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/filters_msa.c b/thirdparty/libwebp/src/dsp/filters_msa.c
index 4b8922d0bc..14c437d141 100644
--- a/thirdparty/libwebp/dsp/filters_msa.c
+++ b/thirdparty/libwebp/src/dsp/filters_msa.c
@@ -11,11 +11,11 @@
 //
 // Author: Prashant Patil (prashant.patil@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
-#include "./msa_macro.h"
+#include "src/dsp/msa_macro.h"
 
 #include <assert.h>
 
@@ -66,8 +66,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
 //------------------------------------------------------------------------------
 // Horrizontal filter
 
-static void HorizontalFilter(const uint8_t* data, int width, int height,
-                             int stride, uint8_t* filtered_data) {
+static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
+                                 int stride, uint8_t* filtered_data) {
   const uint8_t* preds = data;
   const uint8_t* in = data;
   uint8_t* out = filtered_data;
@@ -129,8 +129,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
 }
 
 
-static void GradientFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
+static void GradientFilter_MSA(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
   const uint8_t* in = data;
   const uint8_t* preds = data;
   uint8_t* out = filtered_data;
@@ -157,8 +157,8 @@ static void GradientFilter(const uint8_t* data, int width, int height,
 //------------------------------------------------------------------------------
 // Vertical filter
 
-static void VerticalFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
+static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
   const uint8_t* in = data;
   const uint8_t* preds = data;
   uint8_t* out = filtered_data;
@@ -190,9 +190,9 @@ static void VerticalFilter(const uint8_t* data, int width, int height,
 extern void VP8FiltersInitMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) {
-  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
-  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
-  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MSA;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MSA;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MSA;
 }
 
 #else  // !WEBP_USE_MSA
diff --git a/thirdparty/libwebp/dsp/filters_neon.c b/thirdparty/libwebp/src/dsp/filters_neon.c
index 4d6e50cc76..3e6a578ea7 100644
--- a/thirdparty/libwebp/dsp/filters_neon.c
+++ b/thirdparty/libwebp/src/dsp/filters_neon.c
@@ -11,12 +11,12 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
 #include <assert.h>
-#include "./neon.h"
+#include "src/dsp/neon.h"
 
 //------------------------------------------------------------------------------
 // Helpful macros.
@@ -134,7 +134,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
 }
 
 static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
-                               int stride, uint8_t* filtered_data) {
+                                int stride, uint8_t* filtered_data) {
   DoVerticalFilter_NEON(data, width, height, stride, 0, height,
                         filtered_data);
 }
@@ -196,7 +196,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
 }
 
 static void GradientFilter_NEON(const uint8_t* data, int width, int height,
-                               int stride, uint8_t* filtered_data) {
+                                int stride, uint8_t* filtered_data) {
   DoGradientFilter_NEON(data, width, height, stride, 0, height,
                         filtered_data);
 }
@@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
 // GradientUnfilter_NEON is correct but slower than the C-version,
 // at least on ARM64. For armv7, it's a wash.
 // So best is to disable it for now, but keep the idea around...
-// #define USE_GRADIENT_UNFILTER
+#if !defined(USE_GRADIENT_UNFILTER)
+#define USE_GRADIENT_UNFILTER 0   // ALTERNATE_CODE
+#endif
 
-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
 #define GRAD_PROCESS_LANE(L)  do {                                             \
   const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1);  /* rotate predictor in */   \
   const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1));                       \
@@ -292,7 +294,7 @@ static void GradientPredictInverse_NEON(const uint8_t* const in,
 #undef GRAD_PROCESS_LANE
 
 static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
-                                 uint8_t* out, int width) {
+                                  uint8_t* out, int width) {
   if (prev == NULL) {
     HorizontalUnfilter_NEON(NULL, in, out, width);
   } else {
@@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void);
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
   WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
 #endif
 
diff --git a/thirdparty/libwebp/dsp/filters_sse2.c b/thirdparty/libwebp/src/dsp/filters_sse2.c
index 67f77999e6..5a18895676 100644
--- a/thirdparty/libwebp/dsp/filters_sse2.c
+++ b/thirdparty/libwebp/src/dsp/filters_sse2.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 
@@ -24,16 +24,16 @@
 // Helpful macro.
 
 # define SANITY_CHECK(in, out)                                                 \
-  assert(in != NULL);                                                          \
-  assert(out != NULL);                                                         \
+  assert((in) != NULL);                                                        \
+  assert((out) != NULL);                                                       \
   assert(width > 0);                                                           \
   assert(height > 0);                                                          \
   assert(stride >= width);                                                     \
   assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
   (void)height;  // Silence unused warning.
 
-static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
-                           uint8_t* dst, int length) {
+static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
+                                uint8_t* dst, int length) {
   int i;
   const int max_pos = length & ~31;
   assert(length >= 0);
@@ -51,7 +51,7 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
 }
 
 // Special case for left-based prediction (when preds==dst-1 or preds==src-1).
-static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
+static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
   int i;
   const int max_pos = length & ~31;
   assert(length >= 0);
@@ -71,10 +71,11 @@ static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
 //------------------------------------------------------------------------------
 // Horizontal filter.
 
-static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
-                                           int width, int height, int stride,
-                                           int row, int num_rows,
-                                           uint8_t* out) {
+static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
+                                                int width, int height,
+                                                int stride,
+                                                int row, int num_rows,
+                                                uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
   SANITY_CHECK(in, out);
@@ -84,7 +85,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
   if (row == 0) {
     // Leftmost pixel is the same as input for topmost scanline.
     out[0] = in[0];
-    PredictLineLeft(in + 1, out + 1, width - 1);
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
     row = 1;
     in += stride;
     out += stride;
@@ -94,7 +95,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
   while (row < last_row) {
     // Leftmost pixel is predicted from above.
     out[0] = in[0] - in[-stride];
-    PredictLineLeft(in + 1, out + 1, width - 1);
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
     ++row;
     in += stride;
     out += stride;
@@ -104,9 +105,10 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
 //------------------------------------------------------------------------------
 // Vertical filter.
 
-static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows, uint8_t* out) {
+static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
+                                              int width, int height, int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
   SANITY_CHECK(in, out);
@@ -117,7 +119,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
     // Very first top-left pixel is copied.
     out[0] = in[0];
     // Rest of top scan-line is left-predicted.
-    PredictLineLeft(in + 1, out + 1, width - 1);
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
     row = 1;
     in += stride;
     out += stride;
@@ -125,7 +127,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
 
   // Filter line-by-line.
   while (row < last_row) {
-    PredictLineTop(in, in - stride, out, width);
+    PredictLineTop_SSE2(in, in - stride, out, width);
     ++row;
     in += stride;
     out += stride;
@@ -135,14 +137,14 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
 //------------------------------------------------------------------------------
 // Gradient filter.
 
-static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
+static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
   const int g = a + b - c;
   return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
 }
 
-static void GradientPredictDirect(const uint8_t* const row,
-                                  const uint8_t* const top,
-                                  uint8_t* const out, int length) {
+static void GradientPredictDirect_SSE2(const uint8_t* const row,
+                                       const uint8_t* const top,
+                                       uint8_t* const out, int length) {
   const int max_pos = length & ~7;
   int i;
   const __m128i zero = _mm_setzero_si128();
@@ -161,14 +163,14 @@ static void GradientPredictDirect(const uint8_t* const row,
     _mm_storel_epi64((__m128i*)(out + i), H);
   }
   for (; i < length; ++i) {
-    out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+    out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
   }
 }
 
-static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
-                                         int width, int height, int stride,
-                                         int row, int num_rows,
-                                         uint8_t* out) {
+static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
+                                              int width, int height, int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
   SANITY_CHECK(in, out);
@@ -178,7 +180,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
   // left prediction for top scan-line
   if (row == 0) {
     out[0] = in[0];
-    PredictLineLeft(in + 1, out + 1, width - 1);
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
     row = 1;
     in += stride;
     out += stride;
@@ -187,7 +189,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
   // Filter line-by-line.
   while (row < last_row) {
     out[0] = in[0] - in[-stride];
-    GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
+    GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
     ++row;
     in += stride;
     out += stride;
@@ -198,26 +200,27 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
 
 //------------------------------------------------------------------------------
 
-static void HorizontalFilter(const uint8_t* data, int width, int height,
-                             int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
+static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
+                                  int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
+                          filtered_data);
 }
 
-static void VerticalFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
+static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
 }
 
-static void GradientFilter(const uint8_t* data, int width, int height,
-                           int stride, uint8_t* filtered_data) {
-  DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
+static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
 }
 
 //------------------------------------------------------------------------------
 // Inverse transforms
 
-static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
-                               uint8_t* out, int width) {
+static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                    uint8_t* out, int width) {
   int i;
   __m128i last;
   out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
@@ -238,10 +241,10 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
   for (; i < width; ++i) out[i] = in[i] + out[i - 1];
 }
 
-static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_SSE2(NULL, in, out, width);
   } else {
     int i;
     const int max_pos = width & ~31;
@@ -260,9 +263,9 @@ static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
   }
 }
 
-static void GradientPredictInverse(const uint8_t* const in,
-                                   const uint8_t* const top,
-                                   uint8_t* const row, int length) {
+static void GradientPredictInverse_SSE2(const uint8_t* const in,
+                                        const uint8_t* const top,
+                                        uint8_t* const row, int length) {
   if (length > 0) {
     int i;
     const int max_pos = length & ~7;
@@ -293,18 +296,18 @@ static void GradientPredictInverse(const uint8_t* const in,
       _mm_storel_epi64((__m128i*)&row[i], out);
     }
     for (; i < length; ++i) {
-      row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+      row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
     }
   }
 }
 
-static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
-                             uint8_t* out, int width) {
+static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
   if (prev == NULL) {
-    HorizontalUnfilter(NULL, in, out, width);
+    HorizontalUnfilter_SSE2(NULL, in, out, width);
   } else {
     out[0] = in[0] + prev[0];  // predict from above
-    GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
+    GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
   }
 }
 
@@ -314,13 +317,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
 extern void VP8FiltersInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
-  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
-  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
-  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
 
-  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
-  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
-  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c
index 20d18f6ecd..83f553d9ad 100644
--- a/thirdparty/libwebp/dsp/lossless.c
+++ b/thirdparty/libwebp/src/dsp/lossless.c
@@ -13,14 +13,15 @@
 //          Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
+#include <assert.h>
 #include <math.h>
 #include <stdlib.h>
-#include "../dec/vp8li_dec.h"
-#include "../utils/endian_inl_utils.h"
-#include "./lossless.h"
-#include "./lossless_common.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
 
 #define MAX_DIFF_COST (1e30f)
 
@@ -80,8 +81,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
 }
 
-// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
-#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
+// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
+// inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
 # define LOCAL_INLINE __attribute__ ((noinline))
 #else
 # define LOCAL_INLINE WEBP_INLINE
@@ -107,69 +109,69 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
 //------------------------------------------------------------------------------
 // Predictors
 
-static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
   (void)top;
   (void)left;
   return ARGB_BLACK;
 }
-static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
   (void)top;
   return left;
 }
-static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
   (void)left;
   return top[0];
 }
-static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
   (void)left;
   return top[1];
 }
-static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
   (void)left;
   return top[-1];
 }
-static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average3(left, top[0], top[1]);
   return pred;
 }
-static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average2(left, top[-1]);
   return pred;
 }
-static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average2(left, top[0]);
   return pred;
 }
-static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average2(top[-1], top[0]);
   (void)left;
   return pred;
 }
-static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average2(top[0], top[1]);
   (void)left;
   return pred;
 }
-static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
   return pred;
 }
-static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = Select(top[0], left, top[-1]);
   return pred;
 }
-static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
   return pred;
 }
-static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
   return pred;
 }
 
-GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
-static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
-                          int num_pixels, uint32_t* out) {
+GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
+static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
+                            int num_pixels, uint32_t* out) {
   int i;
   uint32_t left = out[-1];
   for (i = 0; i < num_pixels; ++i) {
@@ -177,29 +179,29 @@ static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
   }
   (void)upper;
 }
-GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
-GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
-GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
-GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
-GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
-GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
-GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
-GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
-GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
-GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
-GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
-GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
+GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
+GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
+GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
+GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
+GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
+GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
+GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
+GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
+GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
+GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
+GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
+GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
 
 //------------------------------------------------------------------------------
 
 // Inverse prediction.
-static void PredictorInverseTransform(const VP8LTransform* const transform,
-                                      int y_start, int y_end,
-                                      const uint32_t* in, uint32_t* out) {
+static void PredictorInverseTransform_C(const VP8LTransform* const transform,
+                                        int y_start, int y_end,
+                                        const uint32_t* in, uint32_t* out) {
   const int width = transform->xsize_;
   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
-    PredictorAdd0(in, NULL, 1, out);
-    PredictorAdd1(in + 1, NULL, width - 1, out + 1);
+    PredictorAdd0_C(in, NULL, 1, out);
+    PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
     in += width;
     out += width;
     ++y_start;
@@ -217,7 +219,7 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
       const uint32_t* pred_mode_src = pred_mode_base;
       int x = 1;
       // First pixel follows the T (mode=2) mode.
-      PredictorAdd2(in, out - width, 1, out);
+      PredictorAdd2_C(in, out - width, 1, out);
       // .. the rest:
       while (x < width) {
         const VP8LPredictorAddSubFunc pred_func =
@@ -272,8 +274,8 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
     const uint32_t argb = src[i];
     const uint32_t green = argb >> 8;
     const uint32_t red = argb >> 16;
-    int new_red = red;
-    int new_blue = argb;
+    int new_red = red & 0xff;
+    int new_blue = argb & 0xff;
     new_red += ColorTransformDelta(m->green_to_red_, green);
     new_red &= 0xff;
     new_blue += ColorTransformDelta(m->green_to_blue_, green);
@@ -284,9 +286,9 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
 }
 
 // Color space inverse transform.
-static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
-                                       int y_start, int y_end,
-                                       const uint32_t* src, uint32_t* dst) {
+static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
+                                         int y_start, int y_end,
+                                         const uint32_t* src, uint32_t* dst) {
   const int width = transform->xsize_;
   const int tile_width = 1 << transform->bits_;
   const int mask = tile_width - 1;
@@ -362,10 +364,10 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
   }                                                                            \
 }
 
-COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
-                    VP8GetARGBIndex, VP8GetARGBValue)
-COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
-                    8b, VP8GetAlphaIndex, VP8GetAlphaValue)
+COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
+                    uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
+                    uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
 
 #undef COLOR_INDEX_INVERSE
 
@@ -380,7 +382,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
       break;
     case PREDICTOR_TRANSFORM:
-      PredictorInverseTransform(transform, row_start, row_end, in, out);
+      PredictorInverseTransform_C(transform, row_start, row_end, in, out);
       if (row_end != transform->ysize_) {
         // The last predicted row in this iteration will be the top-pred row
         // for the first row in next iteration.
@@ -389,7 +391,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
       }
       break;
     case CROSS_COLOR_TRANSFORM:
-      ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
+      ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
       break;
     case COLOR_INDEXING_TRANSFORM:
       if (in == out && transform->bits_ > 0) {
@@ -403,9 +405,9 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
             VP8LSubSampleSize(transform->xsize_, transform->bits_);
         uint32_t* const src = out + out_stride - in_stride;
         memmove(src, out, in_stride * sizeof(*src));
-        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+        ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
       } else {
-        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+        ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
       }
       break;
   }
@@ -452,7 +454,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
     const uint32_t argb = *src++;
     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     *dst++ = ba;
     *dst++ = rg;
 #else
@@ -469,7 +471,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
     const uint32_t argb = *src++;
     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     *dst++ = gb;
     *dst++ = rg;
 #else
@@ -496,22 +498,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
     const uint32_t* const src_end = src + num_pixels;
     while (src < src_end) {
       const uint32_t argb = *src++;
-
-#if !defined(WORDS_BIGENDIAN)
-#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
       WebPUint32ToMem(dst, BSwap32(argb));
-#else  // WEBP_REFERENCE_IMPLEMENTATION
-      dst[0] = (argb >> 24) & 0xff;
-      dst[1] = (argb >> 16) & 0xff;
-      dst[2] = (argb >>  8) & 0xff;
-      dst[3] = (argb >>  0) & 0xff;
-#endif
-#else  // WORDS_BIGENDIAN
-      dst[0] = (argb >>  0) & 0xff;
-      dst[1] = (argb >>  8) & 0xff;
-      dst[2] = (argb >> 16) & 0xff;
-      dst[3] = (argb >> 24) & 0xff;
-#endif
       dst += sizeof(argb);
     }
   } else {
@@ -593,23 +580,23 @@ extern void VP8LDspInitMSA(void);
 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
     (VP8CPUInfo)&lossless_last_cpuinfo_used;
 
-#define COPY_PREDICTOR_ARRAY(IN, OUT) do {              \
-  (OUT)[0] = IN##0;                                     \
-  (OUT)[1] = IN##1;                                     \
-  (OUT)[2] = IN##2;                                     \
-  (OUT)[3] = IN##3;                                     \
-  (OUT)[4] = IN##4;                                     \
-  (OUT)[5] = IN##5;                                     \
-  (OUT)[6] = IN##6;                                     \
-  (OUT)[7] = IN##7;                                     \
-  (OUT)[8] = IN##8;                                     \
-  (OUT)[9] = IN##9;                                     \
-  (OUT)[10] = IN##10;                                   \
-  (OUT)[11] = IN##11;                                   \
-  (OUT)[12] = IN##12;                                   \
-  (OUT)[13] = IN##13;                                   \
-  (OUT)[14] = IN##0; /* <- padding security sentinels*/ \
-  (OUT)[15] = IN##0;                                    \
+#define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
+  (OUT)[0] = IN##0_C;                                     \
+  (OUT)[1] = IN##1_C;                                     \
+  (OUT)[2] = IN##2_C;                                     \
+  (OUT)[3] = IN##3_C;                                     \
+  (OUT)[4] = IN##4_C;                                     \
+  (OUT)[5] = IN##5_C;                                     \
+  (OUT)[6] = IN##6_C;                                     \
+  (OUT)[7] = IN##7_C;                                     \
+  (OUT)[8] = IN##8_C;                                     \
+  (OUT)[9] = IN##9_C;                                     \
+  (OUT)[10] = IN##10_C;                                   \
+  (OUT)[11] = IN##11_C;                                   \
+  (OUT)[12] = IN##12_C;                                   \
+  (OUT)[13] = IN##13_C;                                   \
+  (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
+  (OUT)[15] = IN##0_C;                                    \
 } while (0);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
@@ -620,18 +607,21 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
 
   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
 
-  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+#endif
+
   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
-  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
 
-  VP8LMapColor32b = MapARGB;
-  VP8LMapColor8b = MapAlpha;
+  VP8LMapColor32b = MapARGB_C;
+  VP8LMapColor8b = MapAlpha_C;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -640,11 +630,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
       VP8LDspInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8LDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       VP8LDspInitMIPSdspR2();
@@ -656,6 +641,24 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8LDspInitNEON();
+  }
+#endif
+
+  assert(VP8LAddGreenToBlueAndRed != NULL);
+  assert(VP8LTransformColorInverse != NULL);
+  assert(VP8LConvertBGRAToRGBA != NULL);
+  assert(VP8LConvertBGRAToRGB != NULL);
+  assert(VP8LConvertBGRAToBGR != NULL);
+  assert(VP8LConvertBGRAToRGBA4444 != NULL);
+  assert(VP8LConvertBGRAToRGB565 != NULL);
+  assert(VP8LMapColor32b != NULL);
+  assert(VP8LMapColor8b != NULL);
+
   lossless_last_cpuinfo_used = VP8GetCPUInfo;
 }
 #undef COPY_PREDICTOR_ARRAY
diff --git a/thirdparty/libwebp/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h
index 352a54e509..a99dbda686 100644
--- a/thirdparty/libwebp/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@@ -15,18 +15,18 @@
 #ifndef WEBP_DSP_LOSSLESS_H_
 #define WEBP_DSP_LOSSLESS_H_
 
-#include "../webp/types.h"
-#include "../webp/decode.h"
+#include "src/webp/types.h"
+#include "src/webp/decode.h"
 
-#include "../enc/histogram_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/histogram_enc.h"
+#include "src/utils/utils.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #ifdef WEBP_EXPERIMENTAL_FEATURES
-#include "../enc/delta_palettization_enc.h"
+#include "src/enc/delta_palettization_enc.h"
 #endif  // WEBP_EXPERIMENTAL_FEATURES
 
 //------------------------------------------------------------------------------
@@ -124,7 +124,7 @@ void VP8LDspInit(void);
 typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
 extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
 typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
-                                       uint32_t* const dst, int num_pixels);
+                                       uint32_t* dst, int num_pixels);
 extern VP8LTransformColorFunc VP8LTransformColor;
 typedef void (*VP8LCollectColorBlueTransformsFunc)(
     const uint32_t* argb, int stride,
diff --git a/thirdparty/libwebp/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h
index c40f711208..a2648d1737 100644
--- a/thirdparty/libwebp/dsp/lossless_common.h
+++ b/thirdparty/libwebp/src/dsp/lossless_common.h
@@ -16,9 +16,9 @@
 #ifndef WEBP_DSP_LOSSLESS_COMMON_H_
 #define WEBP_DSP_LOSSLESS_COMMON_H_
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
-#include "../utils/utils.h"
+#include "src/utils/utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -93,14 +93,6 @@ static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
 // -----------------------------------------------------------------------------
 // PrefixEncode()
 
-static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
-  const int log_floor = BitsLog2Floor(n);
-  if (n == (n & ~(n - 1))) {  // zero or a power of two.
-    return log_floor;
-  }
-  return log_floor + 1;
-}
-
 // Splitting of distance and length codes into prefixes and
 // extra bits. The prefixes are encoded with an entropy code
 // while the extra bits are stored just as normal bits.
diff --git a/thirdparty/libwebp/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c
index 4e46fbab8b..92ca3c0542 100644
--- a/thirdparty/libwebp/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@@ -13,15 +13,16 @@
 //          Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
+#include <assert.h>
 #include <math.h>
 #include <stdlib.h>
-#include "../dec/vp8li_dec.h"
-#include "../utils/endian_inl_utils.h"
-#include "./lossless.h"
-#include "./lossless_common.h"
-#include "./yuv.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/dsp/yuv.h"
 
 // lookup table for small values of log2(int)
 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
@@ -325,7 +326,7 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
   112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
 };
 
-static float FastSLog2Slow(uint32_t v) {
+static float FastSLog2Slow_C(uint32_t v) {
   assert(v >= LOG_LOOKUP_IDX_MAX);
   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     int log_cnt = 0;
@@ -351,7 +352,7 @@ static float FastSLog2Slow(uint32_t v) {
   }
 }
 
-static float FastLog2Slow(uint32_t v) {
+static float FastLog2Slow_C(uint32_t v) {
   assert(v >= LOG_LOOKUP_IDX_MAX);
   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     int log_cnt = 0;
@@ -380,7 +381,7 @@ static float FastLog2Slow(uint32_t v) {
 // Methods to calculate Entropy (Shannon).
 
 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
-static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
   int i;
   double retval = 0.;
   int sumX = 0, sumXY = 0;
@@ -453,9 +454,9 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
   *i_prev = i;
 }
 
-static void GetEntropyUnrefined(const uint32_t X[], int length,
-                                VP8LBitEntropy* const bit_entropy,
-                                VP8LStreaks* const stats) {
+static void GetEntropyUnrefined_C(const uint32_t X[], int length,
+                                  VP8LBitEntropy* const bit_entropy,
+                                  VP8LStreaks* const stats) {
   int i;
   int i_prev = 0;
   uint32_t x_prev = X[0];
@@ -474,10 +475,11 @@ static void GetEntropyUnrefined(const uint32_t X[], int length,
   bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
 }
 
-static void GetCombinedEntropyUnrefined(const uint32_t X[], const uint32_t Y[],
-                                        int length,
-                                        VP8LBitEntropy* const bit_entropy,
-                                        VP8LStreaks* const stats) {
+static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
+                                          const uint32_t Y[],
+                                          int length,
+                                          VP8LBitEntropy* const bit_entropy,
+                                          VP8LStreaks* const stats) {
   int i = 1;
   int i_prev = 0;
   uint32_t xy_prev = X[0] + Y[0];
@@ -520,8 +522,8 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
     const uint32_t argb = data[i];
     const uint32_t green = argb >> 8;
     const uint32_t red = argb >> 16;
-    int new_red = red;
-    int new_blue = argb;
+    int new_red = red & 0xff;
+    int new_blue = argb & 0xff;
     new_red -= ColorTransformDelta(m->green_to_red_, green);
     new_red &= 0xff;
     new_blue -= ColorTransformDelta(m->green_to_blue_, green);
@@ -577,8 +579,8 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
 
 //------------------------------------------------------------------------------
 
-static int VectorMismatch(const uint32_t* const array1,
-                          const uint32_t* const array2, int length) {
+static int VectorMismatch_C(const uint32_t* const array1,
+                            const uint32_t* const array2, int length) {
   int match_len = 0;
 
   while (match_len < length && array1[match_len] == array2[match_len]) {
@@ -610,15 +612,15 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
 
 //------------------------------------------------------------------------------
 
-static double ExtraCost(const uint32_t* population, int length) {
+static double ExtraCost_C(const uint32_t* population, int length) {
   int i;
   double cost = 0.;
   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
   return cost;
 }
 
-static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
-                                int length) {
+static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+                                  int length) {
   int i;
   double cost = 0.;
   for (i = 2; i < length - 2; ++i) {
@@ -630,9 +632,9 @@ static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
 
 //------------------------------------------------------------------------------
 
-static void HistogramAdd(const VP8LHistogram* const a,
-                         const VP8LHistogram* const b,
-                         VP8LHistogram* const out) {
+static void HistogramAdd_C(const VP8LHistogram* const a,
+                           const VP8LHistogram* const b,
+                           VP8LHistogram* const out) {
   int i;
   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   assert(a->palette_code_bits_ == b->palette_code_bits_);
@@ -869,26 +871,28 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
 
   VP8LDspInit();
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
 
   VP8LTransformColor = VP8LTransformColor_C;
+#endif
 
   VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
   VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
 
-  VP8LFastLog2Slow = FastLog2Slow;
-  VP8LFastSLog2Slow = FastSLog2Slow;
+  VP8LFastLog2Slow = FastLog2Slow_C;
+  VP8LFastSLog2Slow = FastSLog2Slow_C;
 
-  VP8LExtraCost = ExtraCost;
-  VP8LExtraCostCombined = ExtraCostCombined;
-  VP8LCombinedShannonEntropy = CombinedShannonEntropy;
+  VP8LExtraCost = ExtraCost_C;
+  VP8LExtraCostCombined = ExtraCostCombined_C;
+  VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
 
-  VP8LGetEntropyUnrefined = GetEntropyUnrefined;
-  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined;
+  VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
+  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;
 
-  VP8LHistogramAdd = HistogramAdd;
+  VP8LHistogramAdd = HistogramAdd_C;
 
-  VP8LVectorMismatch = VectorMismatch;
+  VP8LVectorMismatch = VectorMismatch_C;
   VP8LBundleColorMap = VP8LBundleColorMap_C;
 
   VP8LPredictorsSub[0] = PredictorSub0_C;
@@ -937,11 +941,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8LEncDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8LEncDspInitMIPS32();
@@ -958,6 +957,61 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8LEncDspInitNEON();
+  }
+#endif
+
+  assert(VP8LSubtractGreenFromBlueAndRed != NULL);
+  assert(VP8LTransformColor != NULL);
+  assert(VP8LCollectColorBlueTransforms != NULL);
+  assert(VP8LCollectColorRedTransforms != NULL);
+  assert(VP8LFastLog2Slow != NULL);
+  assert(VP8LFastSLog2Slow != NULL);
+  assert(VP8LExtraCost != NULL);
+  assert(VP8LExtraCostCombined != NULL);
+  assert(VP8LCombinedShannonEntropy != NULL);
+  assert(VP8LGetEntropyUnrefined != NULL);
+  assert(VP8LGetCombinedEntropyUnrefined != NULL);
+  assert(VP8LHistogramAdd != NULL);
+  assert(VP8LVectorMismatch != NULL);
+  assert(VP8LBundleColorMap != NULL);
+  assert(VP8LPredictorsSub[0] != NULL);
+  assert(VP8LPredictorsSub[1] != NULL);
+  assert(VP8LPredictorsSub[2] != NULL);
+  assert(VP8LPredictorsSub[3] != NULL);
+  assert(VP8LPredictorsSub[4] != NULL);
+  assert(VP8LPredictorsSub[5] != NULL);
+  assert(VP8LPredictorsSub[6] != NULL);
+  assert(VP8LPredictorsSub[7] != NULL);
+  assert(VP8LPredictorsSub[8] != NULL);
+  assert(VP8LPredictorsSub[9] != NULL);
+  assert(VP8LPredictorsSub[10] != NULL);
+  assert(VP8LPredictorsSub[11] != NULL);
+  assert(VP8LPredictorsSub[12] != NULL);
+  assert(VP8LPredictorsSub[13] != NULL);
+  assert(VP8LPredictorsSub[14] != NULL);
+  assert(VP8LPredictorsSub[15] != NULL);
+  assert(VP8LPredictorsSub_C[0] != NULL);
+  assert(VP8LPredictorsSub_C[1] != NULL);
+  assert(VP8LPredictorsSub_C[2] != NULL);
+  assert(VP8LPredictorsSub_C[3] != NULL);
+  assert(VP8LPredictorsSub_C[4] != NULL);
+  assert(VP8LPredictorsSub_C[5] != NULL);
+  assert(VP8LPredictorsSub_C[6] != NULL);
+  assert(VP8LPredictorsSub_C[7] != NULL);
+  assert(VP8LPredictorsSub_C[8] != NULL);
+  assert(VP8LPredictorsSub_C[9] != NULL);
+  assert(VP8LPredictorsSub_C[10] != NULL);
+  assert(VP8LPredictorsSub_C[11] != NULL);
+  assert(VP8LPredictorsSub_C[12] != NULL);
+  assert(VP8LPredictorsSub_C[13] != NULL);
+  assert(VP8LPredictorsSub_C[14] != NULL);
+  assert(VP8LPredictorsSub_C[15] != NULL);
+
   lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
 
diff --git a/thirdparty/libwebp/dsp/lossless_enc_mips32.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
index 4186b9f50d..e7b58f4e8c 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
@@ -12,9 +12,9 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
-#include "./lossless.h"
-#include "./lossless_common.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
 
 #if defined(WEBP_USE_MIPS32)
 
@@ -23,7 +23,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-static float FastSLog2Slow(uint32_t v) {
+static float FastSLog2Slow_MIPS32(uint32_t v) {
   assert(v >= LOG_LOOKUP_IDX_MAX);
   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     uint32_t log_cnt, y, correction;
@@ -59,7 +59,7 @@ static float FastSLog2Slow(uint32_t v) {
   }
 }
 
-static float FastLog2Slow(uint32_t v) {
+static float FastLog2Slow_MIPS32(uint32_t v) {
   assert(v >= LOG_LOOKUP_IDX_MAX);
   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     uint32_t log_cnt, y;
@@ -104,7 +104,7 @@ static float FastLog2Slow(uint32_t v) {
 //     pop += 2;
 //   }
 //   return (double)cost;
-static double ExtraCost(const uint32_t* const population, int length) {
+static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
   int i, temp0, temp1;
   const uint32_t* pop = &population[4];
   const uint32_t* const LoopEnd = &population[length];
@@ -149,8 +149,8 @@ static double ExtraCost(const uint32_t* const population, int length) {
 //     pY += 2;
 //   }
 //   return (double)cost;
-static double ExtraCostCombined(const uint32_t* const X,
-                                const uint32_t* const Y, int length) {
+static double ExtraCostCombined_MIPS32(const uint32_t* const X,
+                                       const uint32_t* const Y, int length) {
   int i, temp0, temp1, temp2, temp3;
   const uint32_t* pX = &X[4];
   const uint32_t* pY = &Y[4];
@@ -241,9 +241,9 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
   *i_prev = i;
 }
 
-static void GetEntropyUnrefined(const uint32_t X[], int length,
-                                VP8LBitEntropy* const bit_entropy,
-                                VP8LStreaks* const stats) {
+static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
+                                       VP8LBitEntropy* const bit_entropy,
+                                       VP8LStreaks* const stats) {
   int i;
   int i_prev = 0;
   uint32_t x_prev = X[0];
@@ -262,26 +262,27 @@ static void GetEntropyUnrefined(const uint32_t X[], int length,
   bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
 }
 
-static void GetCombinedEntropyUnrefined(const uint32_t X[], const uint32_t Y[],
-                                        int length,
-                                        VP8LBitEntropy* const bit_entropy,
-                                        VP8LStreaks* const stats) {
+static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
+                                               const uint32_t Y[],
+                                               int length,
+                                               VP8LBitEntropy* const entropy,
+                                               VP8LStreaks* const stats) {
   int i = 1;
   int i_prev = 0;
   uint32_t xy_prev = X[0] + Y[0];
 
   memset(stats, 0, sizeof(*stats));
-  VP8LBitEntropyInit(bit_entropy);
+  VP8LBitEntropyInit(entropy);
 
   for (i = 1; i < length; ++i) {
     const uint32_t xy = X[i] + Y[i];
     if (xy != xy_prev) {
-      GetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy, stats);
+      GetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, entropy, stats);
     }
   }
-  GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
+  GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);
 
-  bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+  entropy->entropy += VP8LFastSLog2(entropy->sum);
 }
 
 #define ASM_START                                       \
@@ -374,9 +375,9 @@ static void GetCombinedEntropyUnrefined(const uint32_t X[], const uint32_t Y[],
   }                                                     \
 } while (0)
 
-static void HistogramAdd(const VP8LHistogram* const a,
-                         const VP8LHistogram* const b,
-                         VP8LHistogram* const out) {
+static void HistogramAdd_MIPS32(const VP8LHistogram* const a,
+                                const VP8LHistogram* const b,
+                                VP8LHistogram* const out) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
   const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)
                              - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
@@ -415,13 +416,13 @@ static void HistogramAdd(const VP8LHistogram* const a,
 extern void VP8LEncDspInitMIPS32(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
-  VP8LFastSLog2Slow = FastSLog2Slow;
-  VP8LFastLog2Slow = FastLog2Slow;
-  VP8LExtraCost = ExtraCost;
-  VP8LExtraCostCombined = ExtraCostCombined;
-  VP8LGetEntropyUnrefined = GetEntropyUnrefined;
-  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined;
-  VP8LHistogramAdd = HistogramAdd;
+  VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
+  VP8LFastLog2Slow = FastLog2Slow_MIPS32;
+  VP8LExtraCost = ExtraCost_MIPS32;
+  VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
+  VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
+  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
+  VP8LHistogramAdd = HistogramAdd_MIPS32;
 }
 
 #else  // !WEBP_USE_MIPS32
diff --git a/thirdparty/libwebp/dsp/lossless_enc_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips_dsp_r2.c
index 0abf3c4f36..5855e6ae15 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips_dsp_r2.c
@@ -12,14 +12,14 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "./lossless.h"
+#include "src/dsp/lossless.h"
 
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data,
-                                        int num_pixels) {
+static void SubtractGreenFromBlueAndRed_MIPSdspR2(uint32_t* argb_data,
+                                                  int num_pixels) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
   uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3);
   uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3);
@@ -78,8 +78,8 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
   return (uint32_t)((int)(color_pred) * color) >> 5;
 }
 
-static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
-                           int num_pixels) {
+static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
+                                     uint32_t* data, int num_pixels) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   uint32_t argb, argb1, new_red, new_red1;
   const uint32_t G_to_R = m->green_to_red_;
@@ -171,10 +171,13 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
   return (new_blue & 0xff);
 }
 
-static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
-                                       int tile_width, int tile_height,
-                                       int green_to_blue, int red_to_blue,
-                                       int histo[]) {
+static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb,
+                                                 int stride,
+                                                 int tile_width,
+                                                 int tile_height,
+                                                 int green_to_blue,
+                                                 int red_to_blue,
+                                                 int histo[]) {
   const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
   const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
   const uint32_t mask = 0xff00ffu;
@@ -222,9 +225,12 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
   return (new_red & 0xff);
 }
 
-static void CollectColorRedTransforms(const uint32_t* argb, int stride,
-                                      int tile_width, int tile_height,
-                                      int green_to_red, int histo[]) {
+static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
+                                                int stride,
+                                                int tile_width,
+                                                int tile_height,
+                                                int green_to_red,
+                                                int histo[]) {
   const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
   while (tile_height-- > 0) {
     int x;
@@ -262,10 +268,10 @@ static void CollectColorRedTransforms(const uint32_t* argb, int stride,
 extern void VP8LEncDspInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) {
-  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
-  VP8LTransformColor = TransformColor;
-  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
-  VP8LCollectColorRedTransforms = CollectColorRedTransforms;
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_MIPSdspR2;
+  VP8LTransformColor = TransformColor_MIPSdspR2;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_MIPSdspR2;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/lossless_enc_msa.c b/thirdparty/libwebp/src/dsp/lossless_enc_msa.c
index 2f69ba3bca..600dddfb59 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_msa.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_msa.c
@@ -11,12 +11,12 @@
 //
 // Authors: Prashant Patil (Prashant.Patil@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
-#include "./lossless.h"
-#include "./msa_macro.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/msa_macro.h"
 
 #define TRANSFORM_COLOR_8(src0, src1, dst0, dst1, c0, c1, mask0, mask1) do {  \
   v8i16 g0, g1, t0, t1, t2, t3;                                               \
@@ -48,8 +48,8 @@
   dst = VSHF_UB(src, t0, mask1);                                \
 } while (0)
 
-static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
-                           int num_pixels) {
+static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
+                               int num_pixels) {
   v16u8 src0, dst0;
   const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
                                          (m->green_to_red_ << 16));
@@ -94,7 +94,8 @@ static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
   }
 }
 
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+static void SubtractGreenFromBlueAndRed_MSA(uint32_t* argb_data,
+                                            int num_pixels) {
   int i;
   uint8_t* ptemp_data = (uint8_t*)argb_data;
   v16u8 src0, dst0, tmp0;
@@ -136,8 +137,8 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
 extern void VP8LEncDspInitMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMSA(void) {
-  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
-  VP8LTransformColor = TransformColor;
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_MSA;
+  VP8LTransformColor = TransformColor_MSA;
 }
 
 #else  // !WEBP_USE_MSA
diff --git a/thirdparty/libwebp/dsp/lossless_enc_neon.c b/thirdparty/libwebp/src/dsp/lossless_enc_neon.c
index 4c56f2594b..7c7b73f8b6 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_neon.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_neon.c
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
 #include <arm_neon.h>
 
-#include "./lossless.h"
-#include "./neon.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/neon.h"
 
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
@@ -36,8 +36,8 @@ static const uint8_t kGreenShuffle[16] = {
   1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
 };
 
-static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
-                                             const uint8x16_t shuffle) {
+static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
+                                                  const uint8x16_t shuffle) {
   return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
                      vtbl1q_u8(argb, vget_high_u8(shuffle)));
 }
@@ -45,14 +45,15 @@ static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
 // 255 = byte will be zeroed
 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  };
 
-static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
-                                             const uint8x8_t shuffle) {
+static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
+                                                  const uint8x8_t shuffle) {
   return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
                      vtbl1_u8(vget_high_u8(argb), shuffle));
 }
 #endif  // USE_VTBLQ
 
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
+                                             int num_pixels) {
   const uint32_t* const end = argb_data + (num_pixels & ~3);
 #ifdef USE_VTBLQ
   const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
@@ -61,7 +62,7 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
 #endif
   for (; argb_data < end; argb_data += 4) {
     const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
-    const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+    const uint8x16_t greens = DoGreenShuffle_NEON(argb, shuffle);
     vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
   }
   // fallthrough and finish off with plain-C
@@ -71,8 +72,8 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
 //------------------------------------------------------------------------------
 // Color Transform
 
-static void TransformColor(const VP8LMultipliers* const m,
-                           uint32_t* argb_data, int num_pixels) {
+static void TransformColor_NEON(const VP8LMultipliers* const m,
+                                uint32_t* argb_data, int num_pixels) {
   // sign-extended multiplying constants, pre-shifted by 6.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 6)
   const int16_t rb[8] = {
@@ -102,7 +103,7 @@ static void TransformColor(const VP8LMultipliers* const m,
   for (i = 0; i + 4 <= num_pixels; i += 4) {
     const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
     // 0 g 0 g
-    const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+    const uint8x16_t greens = DoGreenShuffle_NEON(in, shuffle);
     // x dr  x db1
     const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
     // r 0   b   0
@@ -132,8 +133,8 @@ static void TransformColor(const VP8LMultipliers* const m,
 extern void VP8LEncDspInitNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitNEON(void) {
-  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
-  VP8LTransformColor = TransformColor;
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_NEON;
+  VP8LTransformColor = TransformColor_NEON;
 }
 
 #else  // !WEBP_USE_NEON
diff --git a/thirdparty/libwebp/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
index 8ad85d94d7..1eaf35ca8e 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -11,22 +11,23 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 #include <assert.h>
 #include <emmintrin.h>
-#include "./lossless.h"
-#include "./common_sse2.h"
-#include "./lossless_common.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/common_sse2.h"
+#include "src/dsp/lossless_common.h"
 
 // For sign-extended multiplying constants, pre-shifted by 5:
-#define CST_5b(X)  (((int16_t)((uint16_t)X << 8)) >> 5)
+#define CST_5b(X)  (((int16_t)((uint16_t)(X) << 8)) >> 5)
 
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
 
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
+                                             int num_pixels) {
   int i;
   for (i = 0; i + 4 <= num_pixels; i += 4) {
     const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
@@ -45,8 +46,8 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
 //------------------------------------------------------------------------------
 // Color Transform
 
-static void TransformColor(const VP8LMultipliers* const m,
-                           uint32_t* argb_data, int num_pixels) {
+static void TransformColor_SSE2(const VP8LMultipliers* const m,
+                                uint32_t* argb_data, int num_pixels) {
   const __m128i mults_rb = _mm_set_epi16(
       CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
       CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
@@ -80,10 +81,10 @@ static void TransformColor(const VP8LMultipliers* const m,
 
 //------------------------------------------------------------------------------
 #define SPAN 8
-static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
-                                       int tile_width, int tile_height,
-                                       int green_to_blue, int red_to_blue,
-                                       int histo[]) {
+static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
+                                            int tile_width, int tile_height,
+                                            int green_to_blue, int red_to_blue,
+                                            int histo[]) {
   const __m128i mults_r = _mm_set_epi16(
       CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0,
       CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0);
@@ -131,9 +132,9 @@ static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
   }
 }
 
-static void CollectColorRedTransforms(const uint32_t* argb, int stride,
-                                      int tile_width, int tile_height,
-                                      int green_to_red, int histo[]) {
+static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
+                                           int tile_width, int tile_height,
+                                           int green_to_red, int histo[]) {
   const __m128i mults_g = _mm_set_epi16(
       0, CST_5b(green_to_red), 0, CST_5b(green_to_red),
       0, CST_5b(green_to_red), 0, CST_5b(green_to_red));
@@ -177,8 +178,8 @@ static void CollectColorRedTransforms(const uint32_t* argb, int stride,
 //------------------------------------------------------------------------------
 
 #define LINE_SIZE 16    // 8 or 16
-static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out,
-                      int size) {
+static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
+                           int size) {
   int i;
   assert(size % LINE_SIZE == 0);
   for (i = 0; i < size; i += LINE_SIZE) {
@@ -203,7 +204,7 @@ static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out,
   }
 }
 
-static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
+static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
   int i;
   assert(size % LINE_SIZE == 0);
   for (i = 0; i < size; i += LINE_SIZE) {
@@ -231,22 +232,22 @@ static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
 
 // Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
 // that's ok since the histogram values are less than 1<<28 (max picture size).
-static void HistogramAdd(const VP8LHistogram* const a,
-                         const VP8LHistogram* const b,
-                         VP8LHistogram* const out) {
+static void HistogramAdd_SSE2(const VP8LHistogram* const a,
+                              const VP8LHistogram* const b,
+                              VP8LHistogram* const out) {
   int i;
   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   assert(a->palette_code_bits_ == b->palette_code_bits_);
   if (b != out) {
-    AddVector(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
-    AddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    AddVector_SSE2(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVector_SSE2(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
+    AddVector_SSE2(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVector_SSE2(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
   } else {
-    AddVectorEq(a->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
-    AddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    AddVectorEq_SSE2(a->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVectorEq_SSE2(a->red_, out->red_, NUM_LITERAL_CODES);
+    AddVectorEq_SSE2(a->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVectorEq_SSE2(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
   }
   for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
     out->literal_[i] = a->literal_[i] + b->literal_[i];
@@ -261,9 +262,9 @@ static void HistogramAdd(const VP8LHistogram* const a,
 
 // Checks whether the X or Y contribution is worth computing and adding.
 // Used in loop unrolling.
-#define ANALYZE_X_OR_Y(x_or_y, j)                                   \
-  do {                                                              \
-    if (x_or_y[i + j] != 0) retval -= VP8LFastSLog2(x_or_y[i + j]); \
+#define ANALYZE_X_OR_Y(x_or_y, j)                                           \
+  do {                                                                      \
+    if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \
   } while (0)
 
 // Checks whether the X + Y contribution is worth computing and adding.
@@ -276,7 +277,7 @@ static void HistogramAdd(const VP8LHistogram* const a,
     }                                  \
   } while (0)
 
-static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
   int i;
   double retval = 0.;
   int sumX, sumXY;
@@ -332,8 +333,8 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
 
 //------------------------------------------------------------------------------
 
-static int VectorMismatch(const uint32_t* const array1,
-                          const uint32_t* const array2, int length) {
+static int VectorMismatch_SSE2(const uint32_t* const array1,
+                               const uint32_t* const array2, int length) {
   int match_len;
 
   if (length >= 12) {
@@ -574,8 +575,8 @@ static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
 }
 
 // Predictor11: select.
-static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B,
-                            __m128i* const out) {
+static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
+                                 __m128i* const out) {
   // We can unpack with any value on the upper 32 bits, provided it's the same
   // on both operands (to that their sum of abs diff is zero). Here we use *A.
   const __m128i A_lo = _mm_unpacklo_epi32(*A, *A);
@@ -596,8 +597,8 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
     const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
     const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
     __m128i pa, pb;
-    GetSumAbsDiff32(&T, &TL, &pa);   // pa = sum |T-TL|
-    GetSumAbsDiff32(&L, &TL, &pb);   // pb = sum |L-TL|
+    GetSumAbsDiff32_SSE2(&T, &TL, &pa);   // pa = sum |T-TL|
+    GetSumAbsDiff32_SSE2(&L, &TL, &pb);   // pb = sum |L-TL|
     {
       const __m128i mask = _mm_cmpgt_epi32(pb, pa);
       const __m128i A = _mm_and_si128(mask, L);
@@ -677,13 +678,13 @@ static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
 extern void VP8LEncDspInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
-  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
-  VP8LTransformColor = TransformColor;
-  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
-  VP8LCollectColorRedTransforms = CollectColorRedTransforms;
-  VP8LHistogramAdd = HistogramAdd;
-  VP8LCombinedShannonEntropy = CombinedShannonEntropy;
-  VP8LVectorMismatch = VectorMismatch;
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE2;
+  VP8LTransformColor = TransformColor_SSE2;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE2;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
+  VP8LHistogramAdd = HistogramAdd_SSE2;
+  VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
+  VP8LVectorMismatch = VectorMismatch_SSE2;
   VP8LBundleColorMap = BundleColorMap_SSE2;
 
   VP8LPredictorsSub[0] = PredictorSub0_SSE2;
diff --git a/thirdparty/libwebp/dsp/lossless_enc_sse41.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
index 821057ccd4..3526a342d3 100644
--- a/thirdparty/libwebp/dsp/lossless_enc_sse41.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
@@ -11,17 +11,18 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE41)
 #include <assert.h>
 #include <smmintrin.h>
-#include "./lossless.h"
+#include "src/dsp/lossless.h"
 
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
 
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
+                                              int num_pixels) {
   int i;
   const __m128i kCstShuffle = _mm_set_epi8(-1, 13, -1, 13, -1, 9, -1, 9,
                                            -1,  5, -1,  5, -1, 1, -1, 1);
@@ -43,7 +44,7 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
 extern void VP8LEncDspInitSSE41(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
-  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
 }
 
 #else  // !WEBP_USE_SSE41
diff --git a/thirdparty/libwebp/dsp/lossless_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
index 2984ce8df7..9888854d57 100644
--- a/thirdparty/libwebp/dsp/lossless_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
@@ -12,12 +12,12 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "./lossless.h"
-#include "./lossless_common.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
 
 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
 static void FUNC_NAME(const TYPE* src,                                         \
@@ -86,8 +86,8 @@ static void FUNC_NAME(const TYPE* src,                                         \
   }                                                                            \
 }
 
-MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
-MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
+MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
+MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
 
 #undef MAP_COLOR_FUNCS
 
@@ -188,48 +188,52 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
   return Average2(Average2(a0, a1), Average2(a2, a3));
 }
 
-static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
   return Average3(left, top[0], top[1]);
 }
 
-static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
   return Average2(left, top[-1]);
 }
 
-static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
   return Average2(left, top[0]);
 }
 
-static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
   (void)left;
   return Average2(top[-1], top[0]);
 }
 
-static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
   (void)left;
   return Average2(top[0], top[1]);
 }
 
-static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor10_MIPSdspR2(uint32_t left,
+                                      const uint32_t* const top) {
   return Average4(left, top[-1], top[0], top[1]);
 }
 
-static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor11_MIPSdspR2(uint32_t left,
+                                      const uint32_t* const top) {
   return Select(top[0], left, top[-1]);
 }
 
-static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor12_MIPSdspR2(uint32_t left,
+                                      const uint32_t* const top) {
   return ClampedAddSubtractFull(left, top[0], top[-1]);
 }
 
-static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor13_MIPSdspR2(uint32_t left,
+                                      const uint32_t* const top) {
   return ClampedAddSubtractHalf(left, top[0], top[-1]);
 }
 
 // Add green to blue and red channels (i.e. perform the inverse transform of
 // 'subtract green').
-static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
-                                 uint32_t* dst) {
+static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
+                                           uint32_t* dst) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -285,9 +289,9 @@ static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
   );
 }
 
-static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  const uint32_t* src, int num_pixels,
-                                  uint32_t* dst) {
+static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
+                                            const uint32_t* src, int num_pixels,
+                                            uint32_t* dst) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   uint32_t argb, argb1, new_red;
   const uint32_t G_to_R = m->green_to_red_;
@@ -356,8 +360,8 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
   if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
 }
 
-static void ConvertBGRAToRGB(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
+                                       int num_pixels, uint8_t* dst) {
   int temp0, temp1, temp2, temp3;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -408,8 +412,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
   );
 }
 
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
+                                        int num_pixels, uint8_t* dst) {
   int temp0, temp1, temp2, temp3;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -458,8 +462,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
   );
 }
 
-static void ConvertBGRAToRGBA4444(const uint32_t* src,
-                                  int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
+                                            int num_pixels, uint8_t* dst) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -492,7 +496,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     "ins            %[temp3],    %[temp5],          16,   4    \n\t"
     "addiu          %[src],      %[src],            16         \n\t"
     "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "usw            %[temp1],    0(%[dst])                     \n\t"
     "usw            %[temp3],    4(%[dst])                     \n\t"
 #else
@@ -514,7 +518,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     "ins            %[temp0],    %[temp5],          16,   4    \n\t"
     "addiu          %[src],      %[src],            4          \n\t"
     "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "ush            %[temp0],    0(%[dst])                     \n\t"
 #else
     "wsbh           %[temp0],    %[temp0]                      \n\t"
@@ -532,8 +536,8 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
   );
 }
 
-static void ConvertBGRAToRGB565(const uint32_t* src,
-                                int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
+                                          int num_pixels, uint8_t* dst) {
   int temp0, temp1, temp2, temp3, temp4, temp5;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -570,7 +574,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     "ins            %[temp2],    %[temp3],          0,    5    \n\t"
     "addiu          %[src],      %[src],            16         \n\t"
     "append         %[temp2],    %[temp1],          16         \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "usw            %[temp0],    0(%[dst])                     \n\t"
     "usw            %[temp2],    4(%[dst])                     \n\t"
 #else
@@ -592,7 +596,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     "ins            %[temp4],    %[temp5],          0,    11   \n\t"
     "addiu          %[src],      %[src],            4          \n\t"
     "ins            %[temp4],    %[temp0],          0,    5    \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     "ush            %[temp4],    0(%[dst])                     \n\t"
 #else
     "wsbh           %[temp4],    %[temp4]                      \n\t"
@@ -610,8 +614,8 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
   );
 }
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
+                                       int num_pixels, uint8_t* dst) {
   int temp0, temp1, temp2, temp3;
   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
   const uint32_t* const p_loop2_end = src + num_pixels;
@@ -662,24 +666,27 @@ static void ConvertBGRAToBGR(const uint32_t* src,
 extern void VP8LDspInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
-  VP8LMapColor32b = MapARGB;
-  VP8LMapColor8b = MapAlpha;
-  VP8LPredictors[5] = Predictor5;
-  VP8LPredictors[6] = Predictor6;
-  VP8LPredictors[7] = Predictor7;
-  VP8LPredictors[8] = Predictor8;
-  VP8LPredictors[9] = Predictor9;
-  VP8LPredictors[10] = Predictor10;
-  VP8LPredictors[11] = Predictor11;
-  VP8LPredictors[12] = Predictor12;
-  VP8LPredictors[13] = Predictor13;
-  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
-  VP8LTransformColorInverse = TransformColorInverse;
-  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
-  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
-  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
-  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
-  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+  VP8LMapColor32b = MapARGB_MIPSdspR2;
+  VP8LMapColor8b = MapAlpha_MIPSdspR2;
+
+  VP8LPredictors[5] = Predictor5_MIPSdspR2;
+  VP8LPredictors[6] = Predictor6_MIPSdspR2;
+  VP8LPredictors[7] = Predictor7_MIPSdspR2;
+  VP8LPredictors[8] = Predictor8_MIPSdspR2;
+  VP8LPredictors[9] = Predictor9_MIPSdspR2;
+  VP8LPredictors[10] = Predictor10_MIPSdspR2;
+  VP8LPredictors[11] = Predictor11_MIPSdspR2;
+  VP8LPredictors[12] = Predictor12_MIPSdspR2;
+  VP8LPredictors[13] = Predictor13_MIPSdspR2;
+
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
+  VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
+
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
+  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
+  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/lossless_msa.c b/thirdparty/libwebp/src/dsp/lossless_msa.c
index f6dd5649ac..9f5472078d 100644
--- a/thirdparty/libwebp/dsp/lossless_msa.c
+++ b/thirdparty/libwebp/src/dsp/lossless_msa.c
@@ -11,12 +11,12 @@
 //
 // Author: Prashant Patil (prashant.patil@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
-#include "./lossless.h"
-#include "./msa_macro.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/msa_macro.h"
 
 //------------------------------------------------------------------------------
 // Colorspace conversion functions
@@ -43,7 +43,7 @@
 
 #define CONVERT8_BGRA_XXX(psrc, pdst, m0, m1) do {         \
   uint64_t pix_d;                                          \
-  v16u8 src0, src1, src2, dst0, dst1;                      \
+  v16u8 src0, src1, src2 = { 0 }, dst0, dst1;              \
   LD_UB2(psrc, 16, src0, src1);                            \
   VSHF_B2_UB(src0, src1, src1, src2, m0, m1, dst0, dst1);  \
   ST_UB(dst0, pdst);                                       \
@@ -109,8 +109,8 @@
   dst = VSHF_UB(src, t0, mask1);                                        \
 } while (0)
 
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA_MSA(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   int i;
   const uint8_t* ptemp_src = (const uint8_t*)src;
   uint8_t* ptemp_dst = (uint8_t*)dst;
@@ -150,8 +150,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToBGR_MSA(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst) {
   const uint8_t* ptemp_src = (const uint8_t*)src;
   uint8_t* ptemp_dst = (uint8_t*)dst;
   const v16u8 mask0 = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14,
@@ -197,8 +197,8 @@ static void ConvertBGRAToBGR(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToRGB(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB_MSA(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst) {
   const uint8_t* ptemp_src = (const uint8_t*)src;
   uint8_t* ptemp_dst = (uint8_t*)dst;
   const v16u8 mask0 = { 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12,
@@ -244,8 +244,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
   }
 }
 
-static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
-                                 uint32_t* dst) {
+static void AddGreenToBlueAndRed_MSA(const uint32_t* const src, int num_pixels,
+                                     uint32_t* dst) {
   int i;
   const uint8_t* in = (const uint8_t*)src;
   uint8_t* out = (uint8_t*)dst;
@@ -286,9 +286,9 @@ static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
   }
 }
 
-static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  const uint32_t* src, int num_pixels,
-                                  uint32_t* dst) {
+static void TransformColorInverse_MSA(const VP8LMultipliers* const m,
+                                      const uint32_t* src, int num_pixels,
+                                      uint32_t* dst) {
   v16u8 src0, dst0;
   const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
                                          (m->green_to_red_ << 16));
@@ -341,11 +341,12 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
 extern void VP8LDspInitMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMSA(void) {
-  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
-  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
-  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
-  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
-  VP8LTransformColorInverse = TransformColorInverse;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MSA;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MSA;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MSA;
+
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MSA;
+  VP8LTransformColorInverse = TransformColorInverse_MSA;
 }
 
 #else  // !WEBP_USE_MSA
diff --git a/thirdparty/libwebp/dsp/lossless_neon.c b/thirdparty/libwebp/src/dsp/lossless_neon.c
index 1145d5fad0..76a1b6f873 100644
--- a/thirdparty/libwebp/dsp/lossless_neon.c
+++ b/thirdparty/libwebp/src/dsp/lossless_neon.c
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
 #include <arm_neon.h>
 
-#include "./lossless.h"
-#include "./neon.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/neon.h"
 
 //------------------------------------------------------------------------------
 // Colorspace conversion functions
@@ -26,8 +26,8 @@
 #if !defined(WORK_AROUND_GCC)
 // gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
 // gcc-4.8.x at least.
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
+                                   int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~15);
   for (; src < end; src += 16) {
     uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
   VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst);  // left-overs
 }
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToBGR_NEON(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~15);
   for (; src < end; src += 16) {
     const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@@ -53,8 +53,8 @@ static void ConvertBGRAToBGR(const uint32_t* src,
   VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst);  // left-overs
 }
 
-static void ConvertBGRAToRGB(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB_NEON(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~15);
   for (; src < end; src += 16) {
     const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@@ -71,8 +71,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
 
 static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
 
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
+                                   int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~1);
   const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
   for (; src < end; src += 2) {
@@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
   { 21, 22, 24, 25, 26, 28, 29, 30 }
 };
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToBGR_NEON(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~7);
   const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
   const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
@@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
   { 21, 20, 26, 25, 24, 30, 29, 28 }
 };
 
-static void ConvertBGRAToRGB(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB_NEON(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~7);
   const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
   const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
@@ -139,7 +139,6 @@ static void ConvertBGRAToRGB(const uint32_t* src,
 
 #endif   // !WORK_AROUND_GCC
 
-
 //------------------------------------------------------------------------------
 // Predictor Transform
 
@@ -506,8 +505,8 @@ static const uint8_t kGreenShuffle[16] = {
   1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
 };
 
-static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
-                                             const uint8x16_t shuffle) {
+static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
+                                                  const uint8x16_t shuffle) {
   return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
                      vtbl1q_u8(argb, vget_high_u8(shuffle)));
 }
@@ -515,15 +514,15 @@ static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
 // 255 = byte will be zeroed
 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  };
 
-static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
-                                             const uint8x8_t shuffle) {
+static WEBP_INLINE uint8x16_t DoGreenShuffle_NEON(const uint8x16_t argb,
+                                                  const uint8x8_t shuffle) {
   return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
                      vtbl1_u8(vget_high_u8(argb), shuffle));
 }
 #endif  // USE_VTBLQ
 
-static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
-                                 uint32_t* dst) {
+static void AddGreenToBlueAndRed_NEON(const uint32_t* src, int num_pixels,
+                                      uint32_t* dst) {
   const uint32_t* const end = src + (num_pixels & ~3);
 #ifdef USE_VTBLQ
   const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
@@ -532,7 +531,7 @@ static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
 #endif
   for (; src < end; src += 4, dst += 4) {
     const uint8x16_t argb = vld1q_u8((const uint8_t*)src);
-    const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+    const uint8x16_t greens = DoGreenShuffle_NEON(argb, shuffle);
     vst1q_u8((uint8_t*)dst, vaddq_u8(argb, greens));
   }
   // fallthrough and finish off with plain-C
@@ -542,9 +541,9 @@ static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
 //------------------------------------------------------------------------------
 // Color Transform
 
-static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  const uint32_t* const src, int num_pixels,
-                                  uint32_t* dst) {
+static void TransformColorInverse_NEON(const VP8LMultipliers* const m,
+                                       const uint32_t* const src,
+                                       int num_pixels, uint32_t* dst) {
 // sign-extended multiplying constants, pre-shifted by 6.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 6)
   const int16_t rb[8] = {
@@ -575,7 +574,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
     const uint8x16_t in = vld1q_u8((const uint8_t*)(src + i));
     const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
     // 0 g 0 g
-    const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+    const uint8x16_t greens = DoGreenShuffle_NEON(in, shuffle);
     // x dr  x db1
     const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
     // x r'  x   b'
@@ -627,12 +626,12 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
   VP8LPredictorsAdd[12] = PredictorAdd12_NEON;
   VP8LPredictorsAdd[13] = PredictorAdd13_NEON;
 
-  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
-  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
-  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_NEON;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_NEON;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_NEON;
 
-  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
-  VP8LTransformColorInverse = TransformColorInverse;
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_NEON;
+  VP8LTransformColorInverse = TransformColorInverse_NEON;
 }
 
 #else  // !WEBP_USE_NEON
diff --git a/thirdparty/libwebp/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c
index 15aae93869..653b466cd6 100644
--- a/thirdparty/libwebp/dsp/lossless_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c
@@ -11,21 +11,22 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 
-#include "./common_sse2.h"
-#include "./lossless.h"
-#include "./lossless_common.h"
+#include "src/dsp/common_sse2.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
 #include <assert.h>
 #include <emmintrin.h>
 
 //------------------------------------------------------------------------------
 // Predictor Transform
 
-static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
-                                                   uint32_t c2) {
+static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
+                                                        uint32_t c1,
+                                                        uint32_t c2) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
   const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
@@ -37,8 +38,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
   return output;
 }
 
-static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
-                                                   uint32_t c2) {
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
+                                                        uint32_t c1,
+                                                        uint32_t c2) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
   const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
@@ -55,7 +57,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
   return output;
 }
 
-static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
   int pa_minus_pb;
   const __m128i zero = _mm_setzero_si128();
   const __m128i A0 = _mm_cvtsi32_si128(a);
@@ -88,8 +90,9 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
   *avg = _mm_sub_epi8(avg1, one);
 }
 
-static WEBP_INLINE void Average2_uint32(const uint32_t a0, const uint32_t a1,
-                                        __m128i* const avg) {
+static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
+                                             const uint32_t a1,
+                                             __m128i* const avg) {
   // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
   const __m128i ones = _mm_set1_epi8(1);
   const __m128i A0 = _mm_cvtsi32_si128(a0);
@@ -99,7 +102,7 @@ static WEBP_INLINE void Average2_uint32(const uint32_t a0, const uint32_t a1,
   *avg = _mm_sub_epi8(avg1, one);
 }
 
-static WEBP_INLINE __m128i Average2_uint32_16(uint32_t a0, uint32_t a1) {
+static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
   const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
@@ -107,15 +110,16 @@ static WEBP_INLINE __m128i Average2_uint32_16(uint32_t a0, uint32_t a1) {
   return _mm_srli_epi16(sum, 1);
 }
 
-static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
   __m128i output;
-  Average2_uint32(a0, a1, &output);
+  Average2_uint32_SSE2(a0, a1, &output);
   return _mm_cvtsi128_si32(output);
 }
 
-static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
+                                          uint32_t a2) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i avg1 = Average2_uint32_16(a0, a2);
+  const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2);
   const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
   const __m128i sum = _mm_add_epi16(avg1, A1);
   const __m128i avg2 = _mm_srli_epi16(sum, 1);
@@ -124,10 +128,10 @@ static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
   return output;
 }
 
-static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
-                                     uint32_t a2, uint32_t a3) {
-  const __m128i avg1 = Average2_uint32_16(a0, a1);
-  const __m128i avg2 = Average2_uint32_16(a2, a3);
+static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
+                                          uint32_t a2, uint32_t a3) {
+  const __m128i avg1 = Average2_uint32_16_SSE2(a0, a1);
+  const __m128i avg2 = Average2_uint32_16_SSE2(a2, a3);
   const __m128i sum = _mm_add_epi16(avg2, avg1);
   const __m128i avg3 = _mm_srli_epi16(sum, 1);
   const __m128i A0 = _mm_packus_epi16(avg3, avg3);
@@ -136,41 +140,41 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
 }
 
 static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average3(left, top[0], top[1]);
+  const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
   return pred;
 }
 static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(left, top[-1]);
+  const uint32_t pred = Average2_SSE2(left, top[-1]);
   return pred;
 }
 static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(left, top[0]);
+  const uint32_t pred = Average2_SSE2(left, top[0]);
   return pred;
 }
 static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(top[-1], top[0]);
+  const uint32_t pred = Average2_SSE2(top[-1], top[0]);
   (void)left;
   return pred;
 }
 static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(top[0], top[1]);
+  const uint32_t pred = Average2_SSE2(top[0], top[1]);
   (void)left;
   return pred;
 }
 static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
   return pred;
 }
 static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Select(top[0], left, top[-1]);
+  const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
   return pred;
 }
 static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
   return pred;
 }
 static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
   return pred;
 }
 
@@ -272,9 +276,24 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
 #undef GENERATE_PREDICTOR_2
 
 // Predictor10: average of (average of (L,TL), average of (T, TR)).
+#define DO_PRED10(OUT) do {               \
+  __m128i avgLTL, avg;                    \
+  Average2_m128i(&L, &TL, &avgLTL);       \
+  Average2_m128i(&avgTTR, &avgLTL, &avg); \
+  L = _mm_add_epi8(avg, src);             \
+  out[i + (OUT)] = _mm_cvtsi128_si32(L);  \
+} while (0)
+
+#define DO_PRED10_SHIFT do {                                  \
+  /* Rotate the pre-computed values for the next iteration.*/ \
+  avgTTR = _mm_srli_si128(avgTTR, 4);                         \
+  TL = _mm_srli_si128(TL, 4);                                 \
+  src = _mm_srli_si128(src, 4);                               \
+} while (0)
+
 static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
                                 int num_pixels, uint32_t* out) {
-  int i, j;
+  int i;
   __m128i L = _mm_cvtsi32_si128(out[-1]);
   for (i = 0; i + 4 <= num_pixels; i += 4) {
     __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
@@ -283,79 +302,90 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
     const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]);
     __m128i avgTTR;
     Average2_m128i(&T, &TR, &avgTTR);
-    for (j = 0; j < 4; ++j) {
-      __m128i avgLTL, avg;
-      Average2_m128i(&L, &TL, &avgLTL);
-      Average2_m128i(&avgTTR, &avgLTL, &avg);
-      L = _mm_add_epi8(avg, src);
-      out[i + j] = _mm_cvtsi128_si32(L);
-      // Rotate the pre-computed values for the next iteration.
-      avgTTR = _mm_srli_si128(avgTTR, 4);
-      TL = _mm_srli_si128(TL, 4);
-      src = _mm_srli_si128(src, 4);
-    }
+    DO_PRED10(0);
+    DO_PRED10_SHIFT;
+    DO_PRED10(1);
+    DO_PRED10_SHIFT;
+    DO_PRED10(2);
+    DO_PRED10_SHIFT;
+    DO_PRED10(3);
   }
   if (i != num_pixels) {
     VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
   }
 }
+#undef DO_PRED10
+#undef DO_PRED10_SHIFT
 
 // Predictor11: select.
-static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B,
-                            __m128i* const out) {
-  // We can unpack with any value on the upper 32 bits, provided it's the same
-  // on both operands (to that their sum of abs diff is zero). Here we use *A.
-  const __m128i A_lo = _mm_unpacklo_epi32(*A, *A);
-  const __m128i B_lo = _mm_unpacklo_epi32(*B, *A);
-  const __m128i A_hi = _mm_unpackhi_epi32(*A, *A);
-  const __m128i B_hi = _mm_unpackhi_epi32(*B, *A);
-  const __m128i s_lo = _mm_sad_epu8(A_lo, B_lo);
-  const __m128i s_hi = _mm_sad_epu8(A_hi, B_hi);
-  *out = _mm_packs_epi32(s_lo, s_hi);
-}
+#define DO_PRED11(OUT) do {                                            \
+  const __m128i L_lo = _mm_unpacklo_epi32(L, T);                       \
+  const __m128i TL_lo = _mm_unpacklo_epi32(TL, T);                     \
+  const __m128i pb = _mm_sad_epu8(L_lo, TL_lo); /* pb = sum |L-TL|*/   \
+  const __m128i mask = _mm_cmpgt_epi32(pb, pa);                        \
+  const __m128i A = _mm_and_si128(mask, L);                            \
+  const __m128i B = _mm_andnot_si128(mask, T);                         \
+  const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
+  L = _mm_add_epi8(src, pred);                                         \
+  out[i + (OUT)] = _mm_cvtsi128_si32(L);                               \
+} while (0)
+
+#define DO_PRED11_SHIFT do {                                \
+  /* Shift the pre-computed value for the next iteration.*/ \
+  T = _mm_srli_si128(T, 4);                                 \
+  TL = _mm_srli_si128(TL, 4);                               \
+  src = _mm_srli_si128(src, 4);                             \
+  pa = _mm_srli_si128(pa, 4);                               \
+} while (0)
 
 static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
                                 int num_pixels, uint32_t* out) {
-  int i, j;
+  int i;
+  __m128i pa;
   __m128i L = _mm_cvtsi32_si128(out[-1]);
   for (i = 0; i + 4 <= num_pixels; i += 4) {
     __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
     __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
     __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
-    __m128i pa;
-    GetSumAbsDiff32(&T, &TL, &pa);   // pa = sum |T-TL|
-    for (j = 0; j < 4; ++j) {
-      const __m128i L_lo = _mm_unpacklo_epi32(L, L);
-      const __m128i TL_lo = _mm_unpacklo_epi32(TL, L);
-      const __m128i pb = _mm_sad_epu8(L_lo, TL_lo);  // pb = sum |L-TL|
-      const __m128i mask = _mm_cmpgt_epi32(pb, pa);
-      const __m128i A = _mm_and_si128(mask, L);
-      const __m128i B = _mm_andnot_si128(mask, T);
-      const __m128i pred = _mm_or_si128(A, B);    // pred = (L > T)? L : T
-      L = _mm_add_epi8(src, pred);
-      out[i + j] = _mm_cvtsi128_si32(L);
-      // Shift the pre-computed value for the next iteration.
-      T = _mm_srli_si128(T, 4);
-      TL = _mm_srli_si128(TL, 4);
-      src = _mm_srli_si128(src, 4);
-      pa = _mm_srli_si128(pa, 4);
+    {
+      // We can unpack with any value on the upper 32 bits, provided it's the
+      // same on both operands (so that their sum of abs diff is zero). Here we
+      // use T.
+      const __m128i T_lo = _mm_unpacklo_epi32(T, T);
+      const __m128i TL_lo = _mm_unpacklo_epi32(TL, T);
+      const __m128i T_hi = _mm_unpackhi_epi32(T, T);
+      const __m128i TL_hi = _mm_unpackhi_epi32(TL, T);
+      const __m128i s_lo = _mm_sad_epu8(T_lo, TL_lo);
+      const __m128i s_hi = _mm_sad_epu8(T_hi, TL_hi);
+      pa = _mm_packs_epi32(s_lo, s_hi);  // pa = sum |T-TL|
     }
+    DO_PRED11(0);
+    DO_PRED11_SHIFT;
+    DO_PRED11(1);
+    DO_PRED11_SHIFT;
+    DO_PRED11(2);
+    DO_PRED11_SHIFT;
+    DO_PRED11(3);
   }
   if (i != num_pixels) {
     VP8LPredictorsAdd_C[11](in + i, upper + i, num_pixels - i, out + i);
   }
 }
+#undef DO_PRED11
+#undef DO_PRED11_SHIFT
 
 // Predictor12: ClampedAddSubtractFull.
-#define DO_PRED12(DIFF, LANE, OUT)                          \
-do {                                                        \
-  const __m128i all = _mm_add_epi16(L, (DIFF));             \
-  const __m128i alls = _mm_packus_epi16(all, all);          \
-  const __m128i res = _mm_add_epi8(src, alls);              \
-  out[i + (OUT)] = _mm_cvtsi128_si32(res);                  \
-  L = _mm_unpacklo_epi8(res, zero);                         \
+#define DO_PRED12(DIFF, LANE, OUT) do {            \
+  const __m128i all = _mm_add_epi16(L, (DIFF));    \
+  const __m128i alls = _mm_packus_epi16(all, all); \
+  const __m128i res = _mm_add_epi8(src, alls);     \
+  out[i + (OUT)] = _mm_cvtsi128_si32(res);         \
+  L = _mm_unpacklo_epi8(res, zero);                \
+} while (0)
+
+#define DO_PRED12_SHIFT(DIFF, LANE) do {                    \
   /* Shift the pre-computed value for the next iteration.*/ \
-  if (LANE == 0) (DIFF) = _mm_srli_si128((DIFF), 8);        \
+  if ((LANE) == 0) (DIFF) = _mm_srli_si128((DIFF), 8);      \
   src = _mm_srli_si128(src, 4);                             \
 } while (0)
 
@@ -377,8 +407,11 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
     __m128i diff_lo = _mm_sub_epi16(T_lo, TL_lo);
     __m128i diff_hi = _mm_sub_epi16(T_hi, TL_hi);
     DO_PRED12(diff_lo, 0, 0);
+    DO_PRED12_SHIFT(diff_lo, 0);
     DO_PRED12(diff_lo, 1, 1);
+    DO_PRED12_SHIFT(diff_lo, 1);
     DO_PRED12(diff_hi, 0, 2);
+    DO_PRED12_SHIFT(diff_hi, 0);
     DO_PRED12(diff_hi, 1, 3);
   }
   if (i != num_pixels) {
@@ -386,6 +419,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
   }
 }
 #undef DO_PRED12
+#undef DO_PRED12_SHIFT
 
 // Due to averages with integers, values cannot be accumulated in parallel for
 // predictors 13.
@@ -394,8 +428,8 @@ GENERATE_PREDICTOR_ADD(Predictor13_SSE2, PredictorAdd13_SSE2)
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
 
-static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
-                                 uint32_t* dst) {
+static void AddGreenToBlueAndRed_SSE2(const uint32_t* const src, int num_pixels,
+                                      uint32_t* dst) {
   int i;
   for (i = 0; i + 4 <= num_pixels; i += 4) {
     const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
@@ -414,9 +448,9 @@ static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
 //------------------------------------------------------------------------------
 // Color Transform
 
-static void TransformColorInverse(const VP8LMultipliers* const m,
-                                  const uint32_t* const src, int num_pixels,
-                                  uint32_t* dst) {
+static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
+                                       const uint32_t* const src,
+                                       int num_pixels, uint32_t* dst) {
 // sign-extended multiplying constants, pre-shifted by 5.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
   const __m128i mults_rb = _mm_set_epi16(
@@ -454,8 +488,8 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
 //------------------------------------------------------------------------------
 // Color-space conversion functions
 
-static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
-                             uint8_t* dst) {
+static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
+                                  uint8_t* dst) {
   const __m128i* in = (const __m128i*)src;
   __m128i* out = (__m128i*)dst;
 
@@ -490,27 +524,26 @@ static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
   }
 }
 
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
+                                   int num_pixels, uint8_t* dst) {
+  const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
   const __m128i* in = (const __m128i*)src;
   __m128i* out = (__m128i*)dst;
   while (num_pixels >= 8) {
-    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
-    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
-    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
-    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
-    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);   // b0b2b4b6g0g2g4g6...
-    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);   // b1b3b5b7g1g3g5g7...
-    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);   // b0...b7 | g0...g7
-    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);   // r0...r7 | a0...a7
-    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);  // g0...g7 | a0...a7
-    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);  // r0...r7 | b0...b7
-    const __m128i rg0 = _mm_unpacklo_epi8(rb0, ga0);   // r0g0r1g1 ... r6g6r7g7
-    const __m128i ba0 = _mm_unpackhi_epi8(rb0, ga0);   // b0a0b1a1 ... b6a6b7a7
-    const __m128i rgba0 = _mm_unpacklo_epi16(rg0, ba0);  // rgba0|rgba1...
-    const __m128i rgba4 = _mm_unpackhi_epi16(rg0, ba0);  // rgba4|rgba5...
-    _mm_storeu_si128(out++, rgba0);
-    _mm_storeu_si128(out++, rgba4);
+    const __m128i A1 = _mm_loadu_si128(in++);
+    const __m128i A2 = _mm_loadu_si128(in++);
+    const __m128i B1 = _mm_and_si128(A1, red_blue_mask);     // R 0 B 0
+    const __m128i B2 = _mm_and_si128(A2, red_blue_mask);     // R 0 B 0
+    const __m128i C1 = _mm_andnot_si128(red_blue_mask, A1);  // 0 G 0 A
+    const __m128i C2 = _mm_andnot_si128(red_blue_mask, A2);  // 0 G 0 A
+    const __m128i D1 = _mm_shufflelo_epi16(B1, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i D2 = _mm_shufflelo_epi16(B2, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i E1 = _mm_shufflehi_epi16(D1, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i E2 = _mm_shufflehi_epi16(D2, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i F1 = _mm_or_si128(E1, C1);
+    const __m128i F2 = _mm_or_si128(E2, C2);
+    _mm_storeu_si128(out++, F1);
+    _mm_storeu_si128(out++, F2);
     num_pixels -= 8;
   }
   // left-overs
@@ -519,8 +552,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToRGBA4444(const uint32_t* src,
-                                  int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
+                                       int num_pixels, uint8_t* dst) {
   const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
   const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
   const __m128i* in = (const __m128i*)src;
@@ -541,7 +574,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
     const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);  // g0-|g1-|...|a6-|a7-
     const __m128i rgba0 = _mm_or_si128(ga2, rb1);       // rg0..rg7 | ba0..ba7
     const __m128i rgba1 = _mm_srli_si128(rgba0, 8);     // ba0..ba7 | 0
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0);  // barg0...barg7
 #else
     const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1);  // rgba0...rgba7
@@ -555,8 +588,8 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToRGB565(const uint32_t* src,
-                                int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
+                                     int num_pixels, uint8_t* dst) {
   const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
   const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
   const __m128i mask_0x07 = _mm_set1_epi8(0x07);
@@ -582,7 +615,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
     const __m128i rg1 = _mm_or_si128(rb1, g_lo2);           // gr0...gr7|xx
     const __m128i b1 = _mm_srli_epi16(b0, 3);
     const __m128i gb1 = _mm_or_si128(b1, g_hi2);            // bg0...bg7|xx
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1);     // rggb0...rggb7
 #else
     const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
@@ -596,8 +629,8 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
   const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
   const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
   const __m128i* in = (const __m128i*)src;
@@ -660,14 +693,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
   VP8LPredictorsAdd[12] = PredictorAdd12_SSE2;
   VP8LPredictorsAdd[13] = PredictorAdd13_SSE2;
 
-  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
-  VP8LTransformColorInverse = TransformColorInverse;
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_SSE2;
+  VP8LTransformColorInverse = TransformColorInverse_SSE2;
 
-  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
-  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
-  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
-  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
-  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE2;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_SSE2;
+  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_SSE2;
+  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_SSE2;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/mips_macro.h b/thirdparty/libwebp/src/dsp/mips_macro.h
index 44aba9b71d..44aba9b71d 100644
--- a/thirdparty/libwebp/dsp/mips_macro.h
+++ b/thirdparty/libwebp/src/dsp/mips_macro.h
diff --git a/thirdparty/libwebp/dsp/msa_macro.h b/thirdparty/libwebp/src/dsp/msa_macro.h
index d0e5f45e01..dfacda6ccd 100644
--- a/thirdparty/libwebp/dsp/msa_macro.h
+++ b/thirdparty/libwebp/src/dsp/msa_macro.h
@@ -22,6 +22,7 @@
 #endif
 
 #ifdef CLANG_BUILD
+  #define ALPHAVAL  (-1)
   #define ADDVI_H(a, b)  __msa_addvi_h((v8i16)a, b)
   #define ADDVI_W(a, b)  __msa_addvi_w((v4i32)a, b)
   #define SRAI_B(a, b)  __msa_srai_b((v16i8)a, b)
@@ -32,6 +33,7 @@
   #define ANDI_B(a, b)  __msa_andi_b((v16u8)a, b)
   #define ORI_B(a, b)   __msa_ori_b((v16u8)a, b)
 #else
+  #define ALPHAVAL  (0xff)
   #define ADDVI_H(a, b)  (a + b)
   #define ADDVI_W(a, b)  (a + b)
   #define SRAI_B(a, b)  (a >> b)
diff --git a/thirdparty/libwebp/dsp/neon.h b/thirdparty/libwebp/src/dsp/neon.h
index 3b548a6855..aa1dea1301 100644
--- a/thirdparty/libwebp/dsp/neon.h
+++ b/thirdparty/libwebp/src/dsp/neon.h
@@ -14,11 +14,12 @@
 
 #include <arm_neon.h>
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 // Right now, some intrinsics functions seem slower, so we disable them
-// everywhere except aarch64 where the inline assembly is incompatible.
-#if defined(__aarch64__)
+// everywhere except newer clang/gcc or aarch64 where the inline assembly is
+// incompatible.
+#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
 #define WEBP_USE_INTRINSICS   // use intrinsics when possible
 #endif
 
@@ -43,11 +44,11 @@
 // if using intrinsics, this flag avoids some functions that make gcc-4.6.3
 // crash ("internal compiler error: in immed_double_const, at emit-rtl.").
 // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
-#if !(LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
 #define WORK_AROUND_GCC
 #endif
 
-static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) {
+static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
   uint64x2x2_t row01, row23;
 
   row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
diff --git a/thirdparty/libwebp/dsp/rescaler.c b/thirdparty/libwebp/src/dsp/rescaler.c
index 0f54502352..4b6b7834e5 100644
--- a/thirdparty/libwebp/dsp/rescaler.c
+++ b/thirdparty/libwebp/src/dsp/rescaler.c
@@ -13,8 +13,8 @@
 
 #include <assert.h>
 
-#include "./dsp.h"
-#include "../utils/rescaler_utils.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/rescaler_utils.h"
 
 //------------------------------------------------------------------------------
 // Implementations of critical functions ImportRow / ExportRow
@@ -25,7 +25,8 @@
 //------------------------------------------------------------------------------
 // Row import
 
-void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
+void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
+                                   const uint8_t* src) {
   const int x_stride = wrk->num_channels;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   int channel;
@@ -56,7 +57,8 @@ void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
   }
 }
 
-void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
+void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk,
+                                   const uint8_t* src) {
   const int x_stride = wrk->num_channels;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   int channel;
@@ -92,7 +94,7 @@ void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
 //------------------------------------------------------------------------------
 // Row export
 
-void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
+void WebPRescalerExportRowExpand_C(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -123,7 +125,7 @@ void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
   }
 }
 
-void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
+void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -207,11 +209,14 @@ static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
   if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
+#if !defined(WEBP_REDUCE_SIZE)
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C;
+  WebPRescalerExportRowShrink = WebPRescalerExportRowShrink_C;
+#endif
 
-  WebPRescalerImportRowExpand = WebPRescalerImportRowExpandC;
-  WebPRescalerImportRowShrink = WebPRescalerImportRowShrinkC;
-  WebPRescalerExportRowExpand = WebPRescalerExportRowExpandC;
-  WebPRescalerExportRowShrink = WebPRescalerExportRowShrinkC;
+  WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C;
+  WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -219,11 +224,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
       WebPRescalerDspInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPRescalerDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       WebPRescalerDspInitMIPS32();
@@ -240,5 +240,18 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPRescalerDspInitNEON();
+  }
+#endif
+
+  assert(WebPRescalerExportRowExpand != NULL);
+  assert(WebPRescalerExportRowShrink != NULL);
+  assert(WebPRescalerImportRowExpand != NULL);
+  assert(WebPRescalerImportRowShrink != NULL);
+#endif   // WEBP_REDUCE_SIZE
   rescaler_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/rescaler_mips32.c b/thirdparty/libwebp/src/dsp/rescaler_mips32.c
index e09ad5d19f..542f7e5970 100644
--- a/thirdparty/libwebp/dsp/rescaler_mips32.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_mips32.c
@@ -11,17 +11,18 @@
 //
 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#if defined(WEBP_USE_MIPS32)
+#if defined(WEBP_USE_MIPS32) && !defined(WEBP_REDUCE_SIZE)
 
 #include <assert.h>
-#include "../utils/rescaler_utils.h"
+#include "src/utils/rescaler_utils.h"
 
 //------------------------------------------------------------------------------
 // Row import
 
-static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
+static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
+                                   const uint8_t* src) {
   const int x_stride = wrk->num_channels;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   const int fx_scale = wrk->fx_scale;
@@ -80,7 +81,8 @@ static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
   }
 }
 
-static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
+static void ImportRowExpand_MIPS32(WebPRescaler* const wrk,
+                                   const uint8_t* src) {
   const int x_stride = wrk->num_channels;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   const int x_add = wrk->x_add;
@@ -144,7 +146,7 @@ static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
 //------------------------------------------------------------------------------
 // Row export
 
-static void ExportRowExpand(WebPRescaler* const wrk) {
+static void ExportRowExpand_MIPS32(WebPRescaler* const wrk) {
   uint8_t* dst = wrk->dst;
   rescaler_t* irow = wrk->irow;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
@@ -207,7 +209,7 @@ static void ExportRowExpand(WebPRescaler* const wrk) {
   }
 }
 
-static void ExportRowShrink(WebPRescaler* const wrk) {
+static void ExportRowShrink_MIPS32(WebPRescaler* const wrk) {
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   uint8_t* dst = wrk->dst;
   rescaler_t* irow = wrk->irow;
@@ -278,10 +280,10 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
 extern void WebPRescalerDspInitMIPS32(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
-  WebPRescalerImportRowExpand = ImportRowExpand;
-  WebPRescalerImportRowShrink = ImportRowShrink;
-  WebPRescalerExportRowExpand = ExportRowExpand;
-  WebPRescalerExportRowShrink = ExportRowShrink;
+  WebPRescalerImportRowExpand = ImportRowExpand_MIPS32;
+  WebPRescalerImportRowShrink = ImportRowShrink_MIPS32;
+  WebPRescalerExportRowExpand = ExportRowExpand_MIPS32;
+  WebPRescalerExportRowShrink = ExportRowShrink_MIPS32;
 }
 
 #else  // !WEBP_USE_MIPS32
diff --git a/thirdparty/libwebp/dsp/rescaler_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/rescaler_mips_dsp_r2.c
index 2308d64544..b78aac15e6 100644
--- a/thirdparty/libwebp/dsp/rescaler_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_mips_dsp_r2.c
@@ -11,12 +11,12 @@
 //
 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#if defined(WEBP_USE_MIPS_DSP_R2)
+#if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
 
 #include <assert.h>
-#include "../utils/rescaler_utils.h"
+#include "src/utils/rescaler_utils.h"
 
 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
@@ -24,7 +24,7 @@
 //------------------------------------------------------------------------------
 // Row export
 
-static void ExportRowShrink(WebPRescaler* const wrk) {
+static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
   int i;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
   uint8_t* dst = wrk->dst;
@@ -162,7 +162,7 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
   }
 }
 
-static void ExportRowExpand(WebPRescaler* const wrk) {
+static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
   int i;
   uint8_t* dst = wrk->dst;
   rescaler_t* irow = wrk->irow;
@@ -303,8 +303,8 @@ static void ExportRowExpand(WebPRescaler* const wrk) {
 extern void WebPRescalerDspInitMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
-  WebPRescalerExportRowExpand = ExportRowExpand;
-  WebPRescalerExportRowShrink = ExportRowShrink;
+  WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
+  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/rescaler_msa.c b/thirdparty/libwebp/src/dsp/rescaler_msa.c
index 2c10e55d8c..f3bc99f1cd 100644
--- a/thirdparty/libwebp/dsp/rescaler_msa.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_msa.c
@@ -11,14 +11,14 @@
 //
 // Author: Prashant Patil (prashant.patil@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#if defined(WEBP_USE_MSA)
+#if defined(WEBP_USE_MSA) && !defined(WEBP_REDUCE_SIZE)
 
 #include <assert.h>
 
-#include "../utils/rescaler_utils.h"
-#include "./msa_macro.h"
+#include "src/utils/rescaler_utils.h"
+#include "src/dsp/msa_macro.h"
 
 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
@@ -246,7 +246,7 @@ static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow,
   }
 }
 
-static void RescalerExportRowExpand(WebPRescaler* const wrk) {
+static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
   uint8_t* dst = wrk->dst;
   rescaler_t* irow = wrk->irow;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
@@ -411,7 +411,7 @@ static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst,
   }
 }
 
-static void RescalerExportRowShrink(WebPRescaler* const wrk) {
+static void RescalerExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
   uint8_t* dst = wrk->dst;
   rescaler_t* irow = wrk->irow;
   const int x_out_max = wrk->dst_width * wrk->num_channels;
@@ -433,8 +433,8 @@ static void RescalerExportRowShrink(WebPRescaler* const wrk) {
 extern void WebPRescalerDspInitMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMSA(void) {
-  WebPRescalerExportRowExpand = RescalerExportRowExpand;
-  WebPRescalerExportRowShrink = RescalerExportRowShrink;
+  WebPRescalerExportRowExpand = RescalerExportRowExpand_MIPSdspR2;
+  WebPRescalerExportRowShrink = RescalerExportRowShrink_MIPSdspR2;
 }
 
 #else     // !WEBP_USE_MSA
diff --git a/thirdparty/libwebp/dsp/rescaler_neon.c b/thirdparty/libwebp/src/dsp/rescaler_neon.c
index b2dd8f30cc..3eff9fbaf4 100644
--- a/thirdparty/libwebp/dsp/rescaler_neon.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_neon.c
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#if defined(WEBP_USE_NEON)
+#if defined(WEBP_USE_NEON) && !defined(WEBP_REDUCE_SIZE)
 
 #include <arm_neon.h>
 #include <assert.h>
-#include "./neon.h"
-#include "../utils/rescaler_utils.h"
+#include "src/dsp/neon.h"
+#include "src/utils/rescaler_utils.h"
 
 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
@@ -41,9 +41,9 @@
 #error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
 #endif
 
-static uint32x4_t Interpolate(const rescaler_t* const frow,
-                              const rescaler_t* const irow,
-                              uint32_t A, uint32_t B) {
+static uint32x4_t Interpolate_NEON(const rescaler_t* const frow,
+                                   const rescaler_t* const irow,
+                                   uint32_t A, uint32_t B) {
   LOAD_32x4(frow, A0);
   LOAD_32x4(irow, B0);
   const uint64x2_t C0 = vmull_n_u32(vget_low_u32(A0), A);
@@ -56,7 +56,7 @@ static uint32x4_t Interpolate(const rescaler_t* const frow,
   return E;
 }
 
-static void RescalerExportRowExpand(WebPRescaler* const wrk) {
+static void RescalerExportRowExpand_NEON(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -91,9 +91,9 @@ static void RescalerExportRowExpand(WebPRescaler* const wrk) {
     const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
     for (x_out = 0; x_out < max_span; x_out += 8) {
       const uint32x4_t C0 =
-          Interpolate(frow + x_out + 0, irow + x_out + 0, A, B);
+          Interpolate_NEON(frow + x_out + 0, irow + x_out + 0, A, B);
       const uint32x4_t C1 =
-          Interpolate(frow + x_out + 4, irow + x_out + 4, A, B);
+          Interpolate_NEON(frow + x_out + 4, irow + x_out + 4, A, B);
       const uint32x4_t D0 = MULT_FIX(C0, fy_scale_half);
       const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half);
       const uint16x4_t E0 = vmovn_u32(D0);
@@ -112,7 +112,7 @@ static void RescalerExportRowExpand(WebPRescaler* const wrk) {
   }
 }
 
-static void RescalerExportRowShrink(WebPRescaler* const wrk) {
+static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -175,8 +175,8 @@ static void RescalerExportRowShrink(WebPRescaler* const wrk) {
 extern void WebPRescalerDspInitNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitNEON(void) {
-  WebPRescalerExportRowExpand = RescalerExportRowExpand;
-  WebPRescalerExportRowShrink = RescalerExportRowShrink;
+  WebPRescalerExportRowExpand = RescalerExportRowExpand_NEON;
+  WebPRescalerExportRowShrink = RescalerExportRowShrink_NEON;
 }
 
 #else     // !WEBP_USE_NEON
diff --git a/thirdparty/libwebp/dsp/rescaler_sse2.c b/thirdparty/libwebp/src/dsp/rescaler_sse2.c
index 8271c22e05..f93b204fe1 100644
--- a/thirdparty/libwebp/dsp/rescaler_sse2.c
+++ b/thirdparty/libwebp/src/dsp/rescaler_sse2.c
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
-#if defined(WEBP_USE_SSE2)
+#if defined(WEBP_USE_SSE2) && !defined(WEBP_REDUCE_SIZE)
 #include <emmintrin.h>
 
 #include <assert.h>
-#include "../utils/rescaler_utils.h"
-#include "../utils/utils.h"
+#include "src/utils/rescaler_utils.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Implementations of critical functions ImportRow / ExportRow
@@ -27,7 +27,7 @@
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
 
 // input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
-static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
+static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i A = _mm_loadl_epi64((const __m128i*)(src));  // ABCDEFGH
   const __m128i B = _mm_unpacklo_epi8(A, zero);              // A0B0C0D0E0F0G0H0
@@ -36,14 +36,14 @@ static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
 }
 
 // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
-static void LoadHeightPixels(const uint8_t* const src, __m128i* out) {
+static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i A = _mm_loadl_epi64((const __m128i*)(src));  // ABCDEFGH
   *out = _mm_unpacklo_epi8(A, zero);
 }
 
-static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
-                                        const uint8_t* src) {
+static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
+                                         const uint8_t* src) {
   rescaler_t* frow = wrk->frow;
   const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
   const int x_add = wrk->x_add;
@@ -54,10 +54,10 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
   assert(wrk->x_expand);
   if (wrk->num_channels == 4) {
     if (wrk->src_width < 2) {
-      WebPRescalerImportRowExpandC(wrk, src);
+      WebPRescalerImportRowExpand_C(wrk, src);
       return;
     }
-    LoadTwoPixels(src, &cur_pixels);
+    LoadTwoPixels_SSE2(src, &cur_pixels);
     src += 4;
     while (1) {
       const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum);
@@ -67,7 +67,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
       if (frow >= frow_end) break;
       accum -= wrk->x_sub;
       if (accum < 0) {
-        LoadTwoPixels(src, &cur_pixels);
+        LoadTwoPixels_SSE2(src, &cur_pixels);
         src += 4;
         accum += x_add;
       }
@@ -76,10 +76,10 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
     int left;
     const uint8_t* const src_limit = src + wrk->src_width - 8;
     if (wrk->src_width < 8) {
-      WebPRescalerImportRowExpandC(wrk, src);
+      WebPRescalerImportRowExpand_C(wrk, src);
       return;
     }
-    LoadHeightPixels(src, &cur_pixels);
+    LoadHeightPixels_SSE2(src, &cur_pixels);
     src += 7;
     left = 7;
     while (1) {
@@ -94,7 +94,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
         if (--left) {
           cur_pixels = _mm_srli_si128(cur_pixels, 2);
         } else if (src <= src_limit) {
-          LoadHeightPixels(src, &cur_pixels);
+          LoadHeightPixels_SSE2(src, &cur_pixels);
           src += 7;
           left = 7;
         } else {   // tail
@@ -110,8 +110,8 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
   assert(accum == 0);
 }
 
-static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
-                                        const uint8_t* src) {
+static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
+                                         const uint8_t* src) {
   const int x_sub = wrk->x_sub;
   int accum = 0;
   const __m128i zero = _mm_setzero_si128();
@@ -123,7 +123,7 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
   const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width;
 
   if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) {
-    WebPRescalerImportRowShrinkC(wrk, src);
+    WebPRescalerImportRowShrink_C(wrk, src);
     return;
   }
   assert(!WebPRescalerInputDone(wrk));
@@ -169,12 +169,12 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
 // Row export
 
 // load *src as epi64, multiply by mult and store result in [out0 ... out3]
-static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
-                                            const __m128i* const mult,
-                                            __m128i* const out0,
-                                            __m128i* const out1,
-                                            __m128i* const out2,
-                                            __m128i* const out3) {
+static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
+                                                 const __m128i* const mult,
+                                                 __m128i* const out0,
+                                                 __m128i* const out1,
+                                                 __m128i* const out2,
+                                                 __m128i* const out3) {
   const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
   const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
   const __m128i A2 = _mm_srli_epi64(A0, 32);
@@ -192,12 +192,12 @@ static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
   }
 }
 
-static WEBP_INLINE void ProcessRow(const __m128i* const A0,
-                                   const __m128i* const A1,
-                                   const __m128i* const A2,
-                                   const __m128i* const A3,
-                                   const __m128i* const mult,
-                                   uint8_t* const dst) {
+static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
+                                        const __m128i* const A1,
+                                        const __m128i* const A2,
+                                        const __m128i* const A3,
+                                        const __m128i* const mult,
+                                        uint8_t* const dst) {
   const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
   const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
   const __m128i B0 = _mm_mul_epu32(*A0, *mult);
@@ -210,7 +210,7 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0,
   const __m128i C3 = _mm_add_epi64(B3, rounder);
   const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX);
   const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
-#if (WEBP_RESCALER_FIX < 32)
+#if (WEBP_RESCALER_RFIX < 32)
   const __m128i D2 =
       _mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask);
   const __m128i D3 =
@@ -226,7 +226,7 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0,
   _mm_storel_epi64((__m128i*)dst, G);
 }
 
-static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
+static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -240,8 +240,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
   if (wrk->y_accum == 0) {
     for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
       __m128i A0, A1, A2, A3;
-      LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3);
-      ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+      LoadDispatchAndMult_SSE2(frow + x_out, NULL, &A0, &A1, &A2, &A3);
+      ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
     }
     for (; x_out < x_out_max; ++x_out) {
       const uint32_t J = frow[x_out];
@@ -257,8 +257,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
     const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
     for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
       __m128i A0, A1, A2, A3, B0, B1, B2, B3;
-      LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3);
-      LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3);
+      LoadDispatchAndMult_SSE2(frow + x_out, &mA, &A0, &A1, &A2, &A3);
+      LoadDispatchAndMult_SSE2(irow + x_out, &mB, &B0, &B1, &B2, &B3);
       {
         const __m128i C0 = _mm_add_epi64(A0, B0);
         const __m128i C1 = _mm_add_epi64(A1, B1);
@@ -272,7 +272,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
         const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX);
         const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX);
         const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX);
-        ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out);
+        ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult, dst + x_out);
       }
     }
     for (; x_out < x_out_max; ++x_out) {
@@ -286,7 +286,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
   }
 }
 
-static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
+static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
   int x_out;
   uint8_t* const dst = wrk->dst;
   rescaler_t* const irow = wrk->irow;
@@ -303,8 +303,8 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
     const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
     for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
       __m128i A0, A1, A2, A3, B0, B1, B2, B3;
-      LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
-      LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
+      LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+      LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
       {
         const __m128i C0 = _mm_add_epi64(B0, rounder);
         const __m128i C1 = _mm_add_epi64(B1, rounder);
@@ -324,7 +324,7 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
         const __m128i G1 = _mm_or_si128(D1, F3);
         _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
         _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
-        ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
+        ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
       }
     }
     for (; x_out < x_out_max; ++x_out) {
@@ -340,10 +340,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
     const __m128i zero = _mm_setzero_si128();
     for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
       __m128i A0, A1, A2, A3;
-      LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+      LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
       _mm_storeu_si128((__m128i*)(irow + x_out + 0), zero);
       _mm_storeu_si128((__m128i*)(irow + x_out + 4), zero);
-      ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+      ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
     }
     for (; x_out < x_out_max; ++x_out) {
       const int v = (int)MULT_FIX(irow[x_out], scale);
@@ -362,10 +362,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
 extern void WebPRescalerDspInitSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitSSE2(void) {
-  WebPRescalerImportRowExpand = RescalerImportRowExpandSSE2;
-  WebPRescalerImportRowShrink = RescalerImportRowShrinkSSE2;
-  WebPRescalerExportRowExpand = RescalerExportRowExpandSSE2;
-  WebPRescalerExportRowShrink = RescalerExportRowShrinkSSE2;
+  WebPRescalerImportRowExpand = RescalerImportRowExpand_SSE2;
+  WebPRescalerImportRowShrink = RescalerImportRowShrink_SSE2;
+  WebPRescalerExportRowExpand = RescalerExportRowExpand_SSE2;
+  WebPRescalerExportRowShrink = RescalerExportRowShrink_SSE2;
 }
 
 #else  // !WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/src/dsp/ssim.c b/thirdparty/libwebp/src/dsp/ssim.c
new file mode 100644
index 0000000000..dc1b518a33
--- /dev/null
+++ b/thirdparty/libwebp/src/dsp/ssim.c
@@ -0,0 +1,166 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// distortion calculation
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>  // for abs()
+
+#include "src/dsp/dsp.h"
+
+#if !defined(WEBP_REDUCE_SIZE)
+
+//------------------------------------------------------------------------------
+// SSIM / PSNR
+
+// hat-shaped filter. Sum of coefficients is equal to 16.
+static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = {
+  1, 2, 3, 4, 3, 2, 1
+};
+static const uint32_t kWeightSum = 16 * 16;   // sum{kWeight}^2
+
+static WEBP_INLINE double SSIMCalculation(
+    const VP8DistoStats* const stats, uint32_t N  /*num samples*/) {
+  const uint32_t w2 =  N * N;
+  const uint32_t C1 = 20 * w2;
+  const uint32_t C2 = 60 * w2;
+  const uint32_t C3 = 8 * 8 * w2;   // 'dark' limit ~= 6
+  const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
+  const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
+  if (xmxm + ymym >= C3) {
+    const int64_t xmym = (int64_t)stats->xm * stats->ym;
+    const int64_t sxy = (int64_t)stats->xym * N - xmym;    // can be negative
+    const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
+    const uint64_t syy = (uint64_t)stats->yym * N - ymym;
+    // we descale by 8 to prevent overflow during the fnum/fden multiply.
+    const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8;
+    const uint64_t den_S = (sxx + syy + C2) >> 8;
+    const uint64_t fnum = (2 * xmym + C1) * num_S;
+    const uint64_t fden = (xmxm + ymym + C1) * den_S;
+    const double r = (double)fnum / fden;
+    assert(r >= 0. && r <= 1.0);
+    return r;
+  }
+  return 1.;   // area is too dark to contribute meaningfully
+}
+
+double VP8SSIMFromStats(const VP8DistoStats* const stats) {
+  return SSIMCalculation(stats, kWeightSum);
+}
+
+double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
+  return SSIMCalculation(stats, stats->w);
+}
+
+static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
+                               const uint8_t* src2, int stride2,
+                               int xo, int yo, int W, int H) {
+  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+  const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
+  const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
+                                                  : yo + VP8_SSIM_KERNEL;
+  const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
+  const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
+                                                  : xo + VP8_SSIM_KERNEL;
+  int x, y;
+  src1 += ymin * stride1;
+  src2 += ymin * stride2;
+  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
+    for (x = xmin; x <= xmax; ++x) {
+      const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
+                       * kWeight[VP8_SSIM_KERNEL + y - yo];
+      const uint32_t s1 = src1[x];
+      const uint32_t s2 = src2[x];
+      stats.w   += w;
+      stats.xm  += w * s1;
+      stats.ym  += w * s2;
+      stats.xxm += w * s1 * s1;
+      stats.xym += w * s1 * s2;
+      stats.yym += w * s2 * s2;
+    }
+  }
+  return VP8SSIMFromStatsClipped(&stats);
+}
+
+static double SSIMGet_C(const uint8_t* src1, int stride1,
+                        const uint8_t* src2, int stride2) {
+  VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
+  int x, y;
+  for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
+    for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
+      const uint32_t w = kWeight[x] * kWeight[y];
+      const uint32_t s1 = src1[x];
+      const uint32_t s2 = src2[x];
+      stats.xm  += w * s1;
+      stats.ym  += w * s2;
+      stats.xxm += w * s1 * s1;
+      stats.xym += w * s1 * s2;
+      stats.yym += w * s2 * s2;
+    }
+  }
+  return VP8SSIMFromStats(&stats);
+}
+
+#endif  // !defined(WEBP_REDUCE_SIZE)
+
+//------------------------------------------------------------------------------
+
+#if !defined(WEBP_DISABLE_STATS)
+static uint32_t AccumulateSSE_C(const uint8_t* src1,
+                                const uint8_t* src2, int len) {
+  int i;
+  uint32_t sse2 = 0;
+  assert(len <= 65535);  // to ensure that accumulation fits within uint32_t
+  for (i = 0; i < len; ++i) {
+    const int32_t diff = src1[i] - src2[i];
+    sse2 += diff * diff;
+  }
+  return sse2;
+}
+#endif
+
+//------------------------------------------------------------------------------
+
+#if !defined(WEBP_REDUCE_SIZE)
+VP8SSIMGetFunc VP8SSIMGet;
+VP8SSIMGetClippedFunc VP8SSIMGetClipped;
+#endif
+#if !defined(WEBP_DISABLE_STATS)
+VP8AccumulateSSEFunc VP8AccumulateSSE;
+#endif
+
+extern void VP8SSIMDspInitSSE2(void);
+
+static volatile VP8CPUInfo ssim_last_cpuinfo_used =
+    (VP8CPUInfo)&ssim_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
+  if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+#if !defined(WEBP_REDUCE_SIZE)
+  VP8SSIMGetClipped = SSIMGetClipped_C;
+  VP8SSIMGet = SSIMGet_C;
+#endif
+
+#if !defined(WEBP_DISABLE_STATS)
+  VP8AccumulateSSE = AccumulateSSE_C;
+#endif
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8SSIMDspInitSSE2();
+    }
+#endif
+  }
+
+  ssim_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/thirdparty/libwebp/src/dsp/ssim_sse2.c b/thirdparty/libwebp/src/dsp/ssim_sse2.c
new file mode 100644
index 0000000000..1dcb0eb0ec
--- /dev/null
+++ b/thirdparty/libwebp/src/dsp/ssim_sse2.c
@@ -0,0 +1,165 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of distortion calculation
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+
+#include "src/dsp/common_sse2.h"
+
+#if !defined(WEBP_DISABLE_STATS)
+
+// Helper function
+static WEBP_INLINE void SubtractAndSquare_SSE2(const __m128i a, const __m128i b,
+                                               __m128i* const sum) {
+  // take abs(a-b) in 8b
+  const __m128i a_b = _mm_subs_epu8(a, b);
+  const __m128i b_a = _mm_subs_epu8(b, a);
+  const __m128i abs_a_b = _mm_or_si128(a_b, b_a);
+  // zero-extend to 16b
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(abs_a_b, zero);
+  const __m128i C1 = _mm_unpackhi_epi8(abs_a_b, zero);
+  // multiply with self
+  const __m128i sum1 = _mm_madd_epi16(C0, C0);
+  const __m128i sum2 = _mm_madd_epi16(C1, C1);
+  *sum = _mm_add_epi32(sum1, sum2);
+}
+
+//------------------------------------------------------------------------------
+// SSIM / PSNR entry point
+
+static uint32_t AccumulateSSE_SSE2(const uint8_t* src1,
+                                   const uint8_t* src2, int len) {
+  int i = 0;
+  uint32_t sse2 = 0;
+  if (len >= 16) {
+    const int limit = len - 32;
+    int32_t tmp[4];
+    __m128i sum1;
+    __m128i sum = _mm_setzero_si128();
+    __m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
+    __m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
+    i += 16;
+    while (i <= limit) {
+      const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]);
+      const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]);
+      __m128i sum2;
+      i += 16;
+      SubtractAndSquare_SSE2(a0, b0, &sum1);
+      sum = _mm_add_epi32(sum, sum1);
+      a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
+      b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
+      i += 16;
+      SubtractAndSquare_SSE2(a1, b1, &sum2);
+      sum = _mm_add_epi32(sum, sum2);
+    }
+    SubtractAndSquare_SSE2(a0, b0, &sum1);
+    sum = _mm_add_epi32(sum, sum1);
+    _mm_storeu_si128((__m128i*)tmp, sum);
+    sse2 += (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+  }
+
+  for (; i < len; ++i) {
+    const int32_t diff = src1[i] - src2[i];
+    sse2 += diff * diff;
+  }
+  return sse2;
+}
+#endif  // !defined(WEBP_DISABLE_STATS)
+
+#if !defined(WEBP_REDUCE_SIZE)
+
+static uint32_t HorizontalAdd16b_SSE2(const __m128i* const m) {
+  uint16_t tmp[8];
+  const __m128i a = _mm_srli_si128(*m, 8);
+  const __m128i b = _mm_add_epi16(*m, a);
+  _mm_storeu_si128((__m128i*)tmp, b);
+  return (uint32_t)tmp[3] + tmp[2] + tmp[1] + tmp[0];
+}
+
+static uint32_t HorizontalAdd32b_SSE2(const __m128i* const m) {
+  const __m128i a = _mm_srli_si128(*m, 8);
+  const __m128i b = _mm_add_epi32(*m, a);
+  const __m128i c = _mm_add_epi32(b, _mm_srli_si128(b, 4));
+  return (uint32_t)_mm_cvtsi128_si32(c);
+}
+
+static const uint16_t kWeight[] = { 1, 2, 3, 4, 3, 2, 1, 0 };
+
+#define ACCUMULATE_ROW(WEIGHT) do {                         \
+  /* compute row weight (Wx * Wy) */                        \
+  const __m128i Wy = _mm_set1_epi16((WEIGHT));              \
+  const __m128i W = _mm_mullo_epi16(Wx, Wy);                \
+  /* process 8 bytes at a time (7 bytes, actually) */       \
+  const __m128i a0 = _mm_loadl_epi64((const __m128i*)src1); \
+  const __m128i b0 = _mm_loadl_epi64((const __m128i*)src2); \
+  /* convert to 16b and multiply by weight */               \
+  const __m128i a1 = _mm_unpacklo_epi8(a0, zero);           \
+  const __m128i b1 = _mm_unpacklo_epi8(b0, zero);           \
+  const __m128i wa1 = _mm_mullo_epi16(a1, W);               \
+  const __m128i wb1 = _mm_mullo_epi16(b1, W);               \
+  /* accumulate */                                          \
+  xm  = _mm_add_epi16(xm, wa1);                             \
+  ym  = _mm_add_epi16(ym, wb1);                             \
+  xxm = _mm_add_epi32(xxm, _mm_madd_epi16(a1, wa1));        \
+  xym = _mm_add_epi32(xym, _mm_madd_epi16(a1, wb1));        \
+  yym = _mm_add_epi32(yym, _mm_madd_epi16(b1, wb1));        \
+  src1 += stride1;                                          \
+  src2 += stride2;                                          \
+} while (0)
+
+static double SSIMGet_SSE2(const uint8_t* src1, int stride1,
+                           const uint8_t* src2, int stride2) {
+  VP8DistoStats stats;
+  const __m128i zero = _mm_setzero_si128();
+  __m128i xm = zero, ym = zero;                // 16b accums
+  __m128i xxm = zero, yym = zero, xym = zero;  // 32b accum
+  const __m128i Wx = _mm_loadu_si128((const __m128i*)kWeight);
+  assert(2 * VP8_SSIM_KERNEL + 1 == 7);
+  ACCUMULATE_ROW(1);
+  ACCUMULATE_ROW(2);
+  ACCUMULATE_ROW(3);
+  ACCUMULATE_ROW(4);
+  ACCUMULATE_ROW(3);
+  ACCUMULATE_ROW(2);
+  ACCUMULATE_ROW(1);
+  stats.xm  = HorizontalAdd16b_SSE2(&xm);
+  stats.ym  = HorizontalAdd16b_SSE2(&ym);
+  stats.xxm = HorizontalAdd32b_SSE2(&xxm);
+  stats.xym = HorizontalAdd32b_SSE2(&xym);
+  stats.yym = HorizontalAdd32b_SSE2(&yym);
+  return VP8SSIMFromStats(&stats);
+}
+
+#endif  // !defined(WEBP_REDUCE_SIZE)
+
+extern void VP8SSIMDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInitSSE2(void) {
+#if !defined(WEBP_DISABLE_STATS)
+  VP8AccumulateSSE = AccumulateSSE_SSE2;
+#endif
+#if !defined(WEBP_REDUCE_SIZE)
+  VP8SSIMGet = SSIMGet_SSE2;
+#endif
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8SSIMDspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/thirdparty/libwebp/dsp/upsampling.c b/thirdparty/libwebp/src/dsp/upsampling.c
index 265e722c10..e72626a82a 100644
--- a/thirdparty/libwebp/dsp/upsampling.c
+++ b/thirdparty/libwebp/src/dsp/upsampling.c
@@ -11,8 +11,8 @@
 //
 // Author: somnath@google.com (Somnath Banerjee)
 
-#include "./dsp.h"
-#include "./yuv.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/yuv.h"
 
 #include <assert.h>
 
@@ -63,17 +63,17 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
       const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
       const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
       FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
-           top_dst + (2 * x - 1) * XSTEP);                                     \
+           top_dst + (2 * x - 1) * (XSTEP));                                   \
       FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
-           top_dst + (2 * x - 0) * XSTEP);                                     \
+           top_dst + (2 * x - 0) * (XSTEP));                                   \
     }                                                                          \
     if (bottom_y != NULL) {                                                    \
       const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
       const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
       FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
-           bottom_dst + (2 * x - 1) * XSTEP);                                  \
+           bottom_dst + (2 * x - 1) * (XSTEP));                                \
       FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16),                       \
-           bottom_dst + (2 * x + 0) * XSTEP);                                  \
+           bottom_dst + (2 * x + 0) * (XSTEP));                                \
     }                                                                          \
     tl_uv = t_uv;                                                              \
     l_uv = uv;                                                                 \
@@ -82,24 +82,50 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
     {                                                                          \
       const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
       FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
-           top_dst + (len - 1) * XSTEP);                                       \
+           top_dst + (len - 1) * (XSTEP));                                     \
     }                                                                          \
     if (bottom_y != NULL) {                                                    \
       const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
       FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
-           bottom_dst + (len - 1) * XSTEP);                                    \
+           bottom_dst + (len - 1) * (XSTEP));                                  \
     }                                                                          \
   }                                                                            \
 }
 
 // All variants implemented.
-UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
-UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
-UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
-UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
-UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
-UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
-UPSAMPLE_FUNC(UpsampleRgb565LinePair,  VP8YuvToRgb565,  2)
+#if !WEBP_NEON_OMIT_C_CODE
+UPSAMPLE_FUNC(UpsampleRgbaLinePair_C, VP8YuvToRgba, 4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair_C, VP8YuvToBgra, 4)
+#if !defined(WEBP_REDUCE_CSP)
+UPSAMPLE_FUNC(UpsampleArgbLinePair_C, VP8YuvToArgb, 4)
+UPSAMPLE_FUNC(UpsampleRgbLinePair_C,  VP8YuvToRgb,  3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair_C,  VP8YuvToBgr,  3)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair_C, VP8YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair_C,  VP8YuvToRgb565,  2)
+#else
+static void EmptyUpsampleFunc(const uint8_t* top_y, const uint8_t* bottom_y,
+                              const uint8_t* top_u, const uint8_t* top_v,
+                              const uint8_t* cur_u, const uint8_t* cur_v,
+                              uint8_t* top_dst, uint8_t* bottom_dst, int len) {
+  (void)top_y;
+  (void)bottom_y;
+  (void)top_u;
+  (void)top_v;
+  (void)cur_u;
+  (void)cur_v;
+  (void)top_dst;
+  (void)bottom_dst;
+  (void)len;
+  assert(0);   // COLORSPACE SUPPORT NOT COMPILED
+}
+#define UpsampleArgbLinePair_C EmptyUpsampleFunc
+#define UpsampleRgbLinePair_C EmptyUpsampleFunc
+#define UpsampleBgrLinePair_C EmptyUpsampleFunc
+#define UpsampleRgba4444LinePair_C EmptyUpsampleFunc
+#define UpsampleRgb565LinePair_C EmptyUpsampleFunc
+#endif   // WEBP_REDUCE_CSP
+
+#endif
 
 #undef LOAD_UV
 #undef UPSAMPLE_FUNC
@@ -141,7 +167,6 @@ DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
 
 WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
   WebPInitUpsamplers();
-  VP8YUVInit();
 #ifdef FANCY_UPSAMPLING
   return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
 #else
@@ -158,16 +183,33 @@ extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
 void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,           \
                uint8_t* dst, int len) {                                        \
   int i;                                                                       \
-  for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
+  for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * (XSTEP)]);         \
 }
 
-YUV444_FUNC(WebPYuv444ToRgbC,      VP8YuvToRgb,  3)
-YUV444_FUNC(WebPYuv444ToBgrC,      VP8YuvToBgr,  3)
-YUV444_FUNC(WebPYuv444ToRgbaC,     VP8YuvToRgba, 4)
-YUV444_FUNC(WebPYuv444ToBgraC,     VP8YuvToBgra, 4)
-YUV444_FUNC(WebPYuv444ToArgbC,     VP8YuvToArgb, 4)
-YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
-YUV444_FUNC(WebPYuv444ToRgb565C,   VP8YuvToRgb565, 2)
+YUV444_FUNC(WebPYuv444ToRgba_C,     VP8YuvToRgba, 4)
+YUV444_FUNC(WebPYuv444ToBgra_C,     VP8YuvToBgra, 4)
+#if !defined(WEBP_REDUCE_CSP)
+YUV444_FUNC(WebPYuv444ToRgb_C,      VP8YuvToRgb,  3)
+YUV444_FUNC(WebPYuv444ToBgr_C,      VP8YuvToBgr,  3)
+YUV444_FUNC(WebPYuv444ToArgb_C,     VP8YuvToArgb, 4)
+YUV444_FUNC(WebPYuv444ToRgba4444_C, VP8YuvToRgba4444, 2)
+YUV444_FUNC(WebPYuv444ToRgb565_C,   VP8YuvToRgb565, 2)
+#else
+static void EmptyYuv444Func(const uint8_t* y,
+                            const uint8_t* u, const uint8_t* v,
+                            uint8_t* dst, int len) {
+  (void)y;
+  (void)u;
+  (void)v;
+  (void)dst;
+  (void)len;
+}
+#define WebPYuv444ToRgb_C EmptyYuv444Func
+#define WebPYuv444ToBgr_C EmptyYuv444Func
+#define WebPYuv444ToArgb_C EmptyYuv444Func
+#define WebPYuv444ToRgba4444_C EmptyYuv444Func
+#define WebPYuv444ToRgb565_C EmptyYuv444Func
+#endif   // WEBP_REDUCE_CSP
 
 #undef YUV444_FUNC
 
@@ -182,17 +224,17 @@ static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
   if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
 
-  WebPYUV444Converters[MODE_RGB]       = WebPYuv444ToRgbC;
-  WebPYUV444Converters[MODE_RGBA]      = WebPYuv444ToRgbaC;
-  WebPYUV444Converters[MODE_BGR]       = WebPYuv444ToBgrC;
-  WebPYUV444Converters[MODE_BGRA]      = WebPYuv444ToBgraC;
-  WebPYUV444Converters[MODE_ARGB]      = WebPYuv444ToArgbC;
-  WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
-  WebPYUV444Converters[MODE_RGB_565]   = WebPYuv444ToRgb565C;
-  WebPYUV444Converters[MODE_rgbA]      = WebPYuv444ToRgbaC;
-  WebPYUV444Converters[MODE_bgrA]      = WebPYuv444ToBgraC;
-  WebPYUV444Converters[MODE_Argb]      = WebPYuv444ToArgbC;
-  WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
+  WebPYUV444Converters[MODE_RGBA]      = WebPYuv444ToRgba_C;
+  WebPYUV444Converters[MODE_BGRA]      = WebPYuv444ToBgra_C;
+  WebPYUV444Converters[MODE_RGB]       = WebPYuv444ToRgb_C;
+  WebPYUV444Converters[MODE_BGR]       = WebPYuv444ToBgr_C;
+  WebPYUV444Converters[MODE_ARGB]      = WebPYuv444ToArgb_C;
+  WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444_C;
+  WebPYUV444Converters[MODE_RGB_565]   = WebPYuv444ToRgb565_C;
+  WebPYUV444Converters[MODE_rgbA]      = WebPYuv444ToRgba_C;
+  WebPYUV444Converters[MODE_bgrA]      = WebPYuv444ToBgra_C;
+  WebPYUV444Converters[MODE_Argb]      = WebPYuv444ToArgb_C;
+  WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C;
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -224,17 +266,19 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
   if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
 
 #ifdef FANCY_UPSAMPLING
-  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
-  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
-  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
-  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
-  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair_C;
+  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair_C;
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair_C;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair_C;
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair_C;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair_C;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair_C;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_C;
+  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair_C;
+  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair_C;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_C;
+#endif
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -243,11 +287,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
       WebPInitUpsamplersSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPInitUpsamplersNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitUpsamplersMIPSdspR2();
@@ -259,6 +298,26 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitUpsamplersNEON();
+  }
+#endif
+
+  assert(WebPUpsamplers[MODE_RGBA] != NULL);
+  assert(WebPUpsamplers[MODE_BGRA] != NULL);
+  assert(WebPUpsamplers[MODE_rgbA] != NULL);
+  assert(WebPUpsamplers[MODE_bgrA] != NULL);
+  assert(WebPUpsamplers[MODE_RGB] != NULL);
+  assert(WebPUpsamplers[MODE_BGR] != NULL);
+  assert(WebPUpsamplers[MODE_ARGB] != NULL);
+  assert(WebPUpsamplers[MODE_RGBA_4444] != NULL);
+  assert(WebPUpsamplers[MODE_RGB_565] != NULL);
+  assert(WebPUpsamplers[MODE_Argb] != NULL);
+  assert(WebPUpsamplers[MODE_rgbA_4444] != NULL);
+
 #endif  // FANCY_UPSAMPLING
   upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/upsampling_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/upsampling_mips_dsp_r2.c
index ed2eb74825..10d499d771 100644
--- a/thirdparty/libwebp/dsp/upsampling_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_mips_dsp_r2.c
@@ -12,14 +12,12 @@
 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
 //            Djordje Pesut  (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
 #include <assert.h>
-#include "./yuv.h"
-
-#if !defined(WEBP_YUV_USE_TABLE)
+#include "src/dsp/yuv.h"
 
 #define YUV_TO_RGB(Y, U, V, R, G, B) do {                                      \
     const int t1 = MultHi(Y, 19077);                                           \
@@ -48,6 +46,7 @@
     );                                                                         \
   } while (0)
 
+#if !defined(WEBP_REDUCE_CSP)
 static WEBP_INLINE void YuvToRgb(int y, int u, int v, uint8_t* const rgb) {
   int r, g, b;
   YUV_TO_RGB(y, u, v, r, g, b);
@@ -68,7 +67,7 @@ static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
   {
     const int rg = (r & 0xf8) | (g >> 5);
     const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     rgb[0] = gb;
     rgb[1] = rg;
 #else
@@ -84,7 +83,7 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
   {
     const int rg = (r & 0xf0) | (g >> 4);
     const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     argb[0] = ba;
     argb[1] = rg;
 #else
@@ -93,11 +92,12 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
 #endif
    }
 }
-#endif  // WEBP_YUV_USE_TABLE
+#endif   // WEBP_REDUCE_CSP
 
 //-----------------------------------------------------------------------------
 // Alpha handling variants
 
+#if !defined(WEBP_REDUCE_CSP)
 static WEBP_INLINE void YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
                                   uint8_t* const argb) {
   int r, g, b;
@@ -107,6 +107,7 @@ static WEBP_INLINE void YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
   argb[2] = g;
   argb[3] = b;
 }
+#endif   // WEBP_REDUCE_CSP
 static WEBP_INLINE void YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
                                   uint8_t* const bgra) {
   int r, g, b;
@@ -200,13 +201,15 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
 }
 
 // All variants implemented.
-UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
-UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
 UPSAMPLE_FUNC(UpsampleRgbaLinePair,     YuvToRgba,     4)
 UPSAMPLE_FUNC(UpsampleBgraLinePair,     YuvToBgra,     4)
+#if !defined(WEBP_REDUCE_CSP)
+UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
 UPSAMPLE_FUNC(UpsampleArgbLinePair,     YuvToArgb,     4)
 UPSAMPLE_FUNC(UpsampleRgba4444LinePair, YuvToRgba4444, 2)
 UPSAMPLE_FUNC(UpsampleRgb565LinePair,   YuvToRgb565,   2)
+#endif   // WEBP_REDUCE_CSP
 
 #undef LOAD_UV
 #undef UPSAMPLE_FUNC
@@ -217,17 +220,19 @@ UPSAMPLE_FUNC(UpsampleRgb565LinePair,   YuvToRgb565,   2)
 extern void WebPInitUpsamplersMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
-  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
   WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
   WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
   WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
-  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
   WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #endif  // FANCY_UPSAMPLING
@@ -242,13 +247,15 @@ static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
   for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
 }
 
-YUV444_FUNC(Yuv444ToRgb,      YuvToRgb,      3)
-YUV444_FUNC(Yuv444ToBgr,      YuvToBgr,      3)
 YUV444_FUNC(Yuv444ToRgba,     YuvToRgba,     4)
 YUV444_FUNC(Yuv444ToBgra,     YuvToBgra,     4)
+#if !defined(WEBP_REDUCE_CSP)
+YUV444_FUNC(Yuv444ToRgb,      YuvToRgb,      3)
+YUV444_FUNC(Yuv444ToBgr,      YuvToBgr,      3)
 YUV444_FUNC(Yuv444ToArgb,     YuvToArgb,     4)
 YUV444_FUNC(Yuv444ToRgba4444, YuvToRgba4444, 2)
 YUV444_FUNC(Yuv444ToRgb565,   YuvToRgb565,   2)
+#endif   // WEBP_REDUCE_CSP
 
 #undef YUV444_FUNC
 
@@ -258,17 +265,19 @@ YUV444_FUNC(Yuv444ToRgb565,   YuvToRgb565,   2)
 extern void WebPInitYUV444ConvertersMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersMIPSdspR2(void) {
-  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb;
   WebPYUV444Converters[MODE_RGBA]      = Yuv444ToRgba;
-  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr;
   WebPYUV444Converters[MODE_BGRA]      = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_rgbA]      = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_bgrA]      = Yuv444ToBgra;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb;
+  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr;
   WebPYUV444Converters[MODE_ARGB]      = Yuv444ToArgb;
   WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444;
   WebPYUV444Converters[MODE_RGB_565]   = Yuv444ToRgb565;
-  WebPYUV444Converters[MODE_rgbA]      = Yuv444ToRgba;
-  WebPYUV444Converters[MODE_bgrA]      = Yuv444ToBgra;
   WebPYUV444Converters[MODE_Argb]      = Yuv444ToArgb;
   WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/dsp/upsampling_msa.c b/thirdparty/libwebp/src/dsp/upsampling_msa.c
index f24926fa94..535ffb772c 100644
--- a/thirdparty/libwebp/dsp/upsampling_msa.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_msa.c
@@ -12,12 +12,12 @@
 // Author: Prashant Patil (prashant.patil@imgtec.com)
 
 #include <string.h>
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MSA)
 
-#include "./msa_macro.h"
-#include "./yuv.h"
+#include "src/dsp/msa_macro.h"
+#include "src/dsp/yuv.h"
 
 #ifdef FANCY_UPSAMPLING
 
@@ -274,7 +274,7 @@ static void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
   const int b = Clip8(b1 >> 6);
   const int rg = (r & 0xf8) | (g >> 5);
   const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   rgb[0] = gb;
   rgb[1] = rg;
 #else
@@ -293,7 +293,7 @@ static void YuvToRgba4444(int y, int u, int v, uint8_t* const argb) {
   const int b = Clip8(b1 >> 6);
   const int rg = (r & 0xf0) | (g >> 4);
   const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   argb[0] = ba;
   argb[1] = rg;
 #else
@@ -374,7 +374,7 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
 static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
                           const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B;
-  const v16u8 A = (v16u8)__msa_ldi_b(0xff);
+  const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
   while (length >= 16) {
     CALC_RGB16(y, u, v, R, G, B);
     STORE16_4(R, G, B, A, dst);
@@ -402,7 +402,7 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
 static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
                           const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B;
-  const v16u8 A = (v16u8)__msa_ldi_b(0xff);
+  const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
   while (length >= 16) {
     CALC_RGB16(y, u, v, R, G, B);
     STORE16_4(B, G, R, A, dst);
@@ -430,7 +430,7 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
 static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
                           const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B;
-  const v16u8 A = (v16u8)__msa_ldi_b(0xff);
+  const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
   while (length >= 16) {
     CALC_RGB16(y, u, v, R, G, B);
     STORE16_4(A, R, G, B, dst);
@@ -459,11 +459,11 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
                               const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B, RG, BA, tmp0, tmp1;
   while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(y, u, v, BA, RG, 16, dst);
-  #else
+#else
     CALC_RGBA4444(y, u, v, RG, BA, 16, dst);
-  #endif
+#endif
     y      += 16;
     u      += 16;
     v      += 16;
@@ -473,7 +473,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
   if (length > 8) {
     uint8_t temp[2 * 16] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(temp, u, v, BA, RG, 16, temp);
 #else
     CALC_RGBA4444(temp, u, v, RG, BA, 16, temp);
@@ -482,7 +482,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
   } else if (length > 0) {
     uint8_t temp[2 * 8] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGBA4444(temp, u, v, BA, RG, 8, temp);
 #else
     CALC_RGBA4444(temp, u, v, RG, BA, 8, temp);
@@ -495,11 +495,11 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
                             const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B, RG, GB, tmp0, tmp1;
   while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(y, u, v, GB, RG, 16, dst);
-  #else
+#else
     CALC_RGB565(y, u, v, RG, GB, 16, dst);
-  #endif
+#endif
     y      += 16;
     u      += 16;
     v      += 16;
@@ -509,7 +509,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
   if (length > 8) {
     uint8_t temp[2 * 16] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(temp, u, v, GB, RG, 16, temp);
 #else
     CALC_RGB565(temp, u, v, RG, GB, 16, temp);
@@ -518,7 +518,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
   } else if (length > 0) {
     uint8_t temp[2 * 8] = { 0 };
     memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
     CALC_RGB565(temp, u, v, GB, RG, 8, temp);
 #else
     CALC_RGB565(temp, u, v, RG, GB, 8, temp);
@@ -640,13 +640,15 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y,        \
   }                                                                      \
 }
 
-UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
-UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
 UPSAMPLE_FUNC(UpsampleRgbaLinePair,     YuvToRgba,     4)
 UPSAMPLE_FUNC(UpsampleBgraLinePair,     YuvToBgra,     4)
+#if !defined(WEBP_REDUCE_CSP)
+UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
 UPSAMPLE_FUNC(UpsampleArgbLinePair,     YuvToArgb,     4)
 UPSAMPLE_FUNC(UpsampleRgba4444LinePair, YuvToRgba4444, 2)
 UPSAMPLE_FUNC(UpsampleRgb565LinePair,   YuvToRgb565,   2)
+#endif   // WEBP_REDUCE_CSP
 
 //------------------------------------------------------------------------------
 // Entry point
@@ -656,17 +658,19 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 extern void WebPInitUpsamplersMSA(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMSA(void) {
-  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
   WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
   WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
   WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
   WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #endif  // FANCY_UPSAMPLING
diff --git a/thirdparty/libwebp/dsp/upsampling_neon.c b/thirdparty/libwebp/src/dsp/upsampling_neon.c
index d371a834ff..17cbc9f911 100644
--- a/thirdparty/libwebp/dsp/upsampling_neon.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_neon.c
@@ -12,15 +12,15 @@
 // Author: mans@mansr.com (Mans Rullgard)
 // Based on SSE code by: somnath@google.com (Somnath Banerjee)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_NEON)
 
 #include <assert.h>
 #include <arm_neon.h>
 #include <string.h>
-#include "./neon.h"
-#include "./yuv.h"
+#include "src/dsp/neon.h"
+#include "src/dsp/yuv.h"
 
 #ifdef FANCY_UPSAMPLING
 
@@ -58,8 +58,8 @@
 } while (0)
 
 // Turn the macro into a function for reducing code-size when non-critical
-static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
-                             uint8_t *out) {
+static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2,
+                                  uint8_t *out) {
   UPSAMPLE_16PIXELS(r1, r2, out);
 }
 
@@ -70,7 +70,7 @@ static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
   /* replicate last byte */                                             \
   memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels));    \
   memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels));    \
-  Upsample16Pixels(r1, r2, out);                                        \
+  Upsample16Pixels_NEON(r1, r2, out);                                   \
 }
 
 //-----------------------------------------------------------------------------
@@ -243,13 +243,15 @@ static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y,    \
 }
 
 // NEON variants of the fancy upsampler.
-NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair,  Rgb,  3)
-NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair,  Bgr,  3)
-NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair, Rgba, 4)
-NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair, Bgra, 4)
-NEON_UPSAMPLE_FUNC(UpsampleArgbLinePair, Argb, 4)
-NEON_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, Rgba4444, 2)
-NEON_UPSAMPLE_FUNC(UpsampleRgb565LinePair, Rgb565, 2)
+NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair_NEON, Rgba, 4)
+NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair_NEON, Bgra, 4)
+#if !defined(WEBP_REDUCE_CSP)
+NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair_NEON,  Rgb,  3)
+NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair_NEON,  Bgr,  3)
+NEON_UPSAMPLE_FUNC(UpsampleArgbLinePair_NEON, Argb, 4)
+NEON_UPSAMPLE_FUNC(UpsampleRgba4444LinePair_NEON, Rgba4444, 2)
+NEON_UPSAMPLE_FUNC(UpsampleRgb565LinePair_NEON, Rgb565, 2)
+#endif   // WEBP_REDUCE_CSP
 
 //------------------------------------------------------------------------------
 // Entry point
@@ -259,17 +261,19 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 extern void WebPInitUpsamplersNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersNEON(void) {
-  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
-  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
-  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
-  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
-  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_NEON;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_NEON;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_NEON;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_NEON;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair_NEON;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair_NEON;
+  WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_NEON;
+  WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_NEON;
+  WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_NEON;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_NEON;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_NEON;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #endif  // FANCY_UPSAMPLING
diff --git a/thirdparty/libwebp/dsp/upsampling_sse2.c b/thirdparty/libwebp/src/dsp/upsampling_sse2.c
index b5b668900f..fd5d303982 100644
--- a/thirdparty/libwebp/dsp/upsampling_sse2.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_sse2.c
@@ -11,14 +11,14 @@
 //
 // Author: somnath@google.com (Somnath Banerjee)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_SSE2)
 
 #include <assert.h>
 #include <emmintrin.h>
 #include <string.h>
-#include "./yuv.h"
+#include "src/dsp/yuv.h"
 
 #ifdef FANCY_UPSAMPLING
 
@@ -83,13 +83,13 @@
   GET_M(ad, s, diag2);                  /* diag2 = (3a + b + c + 3d) / 8 */    \
                                                                                \
   /* pack the alternate pixels */                                              \
-  PACK_AND_STORE(a, b, diag1, diag2, out +      0);  /* store top */           \
-  PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32);  /* store bottom */        \
+  PACK_AND_STORE(a, b, diag1, diag2, (out) +      0);  /* store top */         \
+  PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32);  /* store bottom */      \
 }
 
 // Turn the macro into a function for reducing code-size when non-critical
-static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
-                             uint8_t* const out) {
+static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
+                                  uint8_t* const out) {
   UPSAMPLE_32PIXELS(r1, r2, out);
 }
 
@@ -101,30 +101,30 @@ static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
   memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels));          \
   memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels));          \
   /* using the shared function instead of the macro saves ~3k code size */     \
-  Upsample32Pixels(r1, r2, out);                                               \
+  Upsample32Pixels_SSE2(r1, r2, out);                                          \
 }
 
 #define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y,                              \
                     top_dst, bottom_dst, cur_x, num_pixels) {                  \
   int n;                                                                       \
   for (n = 0; n < (num_pixels); ++n) {                                         \
-    FUNC(top_y[(cur_x) + n], r_u[n], r_v[n],                                   \
-         top_dst + ((cur_x) + n) * XSTEP);                                     \
+    FUNC((top_y)[(cur_x) + n], r_u[n], r_v[n],                                 \
+         (top_dst) + ((cur_x) + n) * (XSTEP));                                 \
   }                                                                            \
-  if (bottom_y != NULL) {                                                      \
+  if ((bottom_y) != NULL) {                                                    \
     for (n = 0; n < (num_pixels); ++n) {                                       \
-      FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n],                    \
-           bottom_dst + ((cur_x) + n) * XSTEP);                                \
+      FUNC((bottom_y)[(cur_x) + n], r_u[64 + n], r_v[64 + n],                  \
+           (bottom_dst) + ((cur_x) + n) * (XSTEP));                            \
     }                                                                          \
   }                                                                            \
 }
 
 #define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y,                           \
                        top_dst, bottom_dst, cur_x) do {                        \
-  FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP);              \
-  if (bottom_y != NULL) {                                                      \
-    FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64,                           \
-             bottom_dst + (cur_x) * XSTEP);                                    \
+  FUNC##32_SSE2((top_y) + (cur_x), r_u, r_v, (top_dst) + (cur_x) * (XSTEP));   \
+  if ((bottom_y) != NULL) {                                                    \
+    FUNC##32_SSE2((bottom_y) + (cur_x), r_u + 64, r_v + 64,                    \
+                  (bottom_dst) + (cur_x) * (XSTEP));                           \
   }                                                                            \
 } while (0)
 
@@ -169,13 +169,16 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
 }
 
 // SSE2 variants of the fancy upsampler.
-SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
-SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
-SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
-SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
-SSE2_UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
-SSE2_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
-SSE2_UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair_SSE2, VP8YuvToRgba, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair_SSE2, VP8YuvToBgra, 4)
+
+#if !defined(WEBP_REDUCE_CSP)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE2,  VP8YuvToRgb,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE2,  VP8YuvToBgr,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleArgbLinePair_SSE2, VP8YuvToArgb, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleRgba4444LinePair_SSE2, VP8YuvToRgba4444, 2)
+SSE2_UPSAMPLE_FUNC(UpsampleRgb565LinePair_SSE2, VP8YuvToRgb565, 2)
+#endif   // WEBP_REDUCE_CSP
 
 #undef GET_M
 #undef PACK_AND_STORE
@@ -193,17 +196,19 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 extern void WebPInitUpsamplersSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
-  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
-  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
-  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
-  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
-  WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
-  WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
-  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
-  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_SSE2;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_SSE2;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_SSE2;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_SSE2;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair_SSE2;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair_SSE2;
+  WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_SSE2;
+  WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_SSE2;
+  WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_SSE2;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_SSE2;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_SSE2;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #endif  // FANCY_UPSAMPLING
@@ -213,29 +218,46 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
 extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
 extern void WebPInitYUV444ConvertersSSE2(void);
 
-#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \
-extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u,             \
-                               const uint8_t* v, uint8_t* dst, int len);       \
+#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP)                            \
+extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v,       \
+                   uint8_t* dst, int len);                                     \
 static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
                       uint8_t* dst, int len) {                                 \
   int i;                                                                       \
   const int max_len = len & ~31;                                               \
-  for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\
+  for (i = 0; i < max_len; i += 32) {                                          \
+    CALL(y + i, u + i, v + i, dst + i * (XSTEP));                              \
+  }                                                                            \
   if (i < len) {  /* C-fallback */                                             \
-    WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i);         \
+    CALL_C(y + i, u + i, v + i, dst + i * (XSTEP), len - i);                   \
   }                                                                            \
 }
 
-YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
-YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
-YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
-YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
+YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4);
+YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4);
+#if !defined(WEBP_REDUCE_CSP)
+YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3);
+YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3);
+YUV444_FUNC(Yuv444ToArgb_SSE2, VP8YuvToArgb32_SSE2, WebPYuv444ToArgb_C, 4)
+YUV444_FUNC(Yuv444ToRgba4444_SSE2, VP8YuvToRgba444432_SSE2, \
+            WebPYuv444ToRgba4444_C, 2)
+YUV444_FUNC(Yuv444ToRgb565_SSE2, VP8YuvToRgb56532_SSE2, WebPYuv444ToRgb565_C, 2)
+#endif   // WEBP_REDUCE_CSP
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
-  WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
-  WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
-  WebPYUV444Converters[MODE_RGB]  = Yuv444ToRgb;
-  WebPYUV444Converters[MODE_BGR]  = Yuv444ToBgr;
+  WebPYUV444Converters[MODE_RGBA]      = Yuv444ToRgba_SSE2;
+  WebPYUV444Converters[MODE_BGRA]      = Yuv444ToBgra_SSE2;
+  WebPYUV444Converters[MODE_rgbA]      = Yuv444ToRgba_SSE2;
+  WebPYUV444Converters[MODE_bgrA]      = Yuv444ToBgra_SSE2;
+#if !defined(WEBP_REDUCE_CSP)
+  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb_SSE2;
+  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr_SSE2;
+  WebPYUV444Converters[MODE_ARGB]      = Yuv444ToArgb_SSE2;
+  WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444_SSE2;
+  WebPYUV444Converters[MODE_RGB_565]   = Yuv444ToRgb565_SSE2;
+  WebPYUV444Converters[MODE_Argb]      = Yuv444ToArgb_SSE2;
+  WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444_SSE2;
+#endif   // WEBP_REDUCE_CSP
 }
 
 #else
diff --git a/thirdparty/libwebp/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c
index dd7d9dedfa..bddf81fe09 100644
--- a/thirdparty/libwebp/dsp/yuv.c
+++ b/thirdparty/libwebp/src/dsp/yuv.c
@@ -11,63 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./yuv.h"
+#include "src/dsp/yuv.h"
 
+#include <assert.h>
 #include <stdlib.h>
 
-#if defined(WEBP_YUV_USE_TABLE)
-
-static int done = 0;
-
-static WEBP_INLINE uint8_t clip(int v, int max_value) {
-  return v < 0 ? 0 : v > max_value ? max_value : v;
-}
-
-int16_t VP8kVToR[256], VP8kUToB[256];
-int32_t VP8kVToG[256], VP8kUToG[256];
-uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
-uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
-  int i;
-  if (done) {
-    return;
-  }
-#ifndef USE_YUVj
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -45773 * (i - 128);
-    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#else
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -46802 * (i - 128);
-    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = i;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#endif
-
-  done = 1;
-}
-
-#else
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
-
-#endif  // WEBP_YUV_USE_TABLE
-
 //-----------------------------------------------------------------------------
 // Plain-C version
 
@@ -75,14 +23,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
 static void FUNC_NAME(const uint8_t* y,                                        \
                       const uint8_t* u, const uint8_t* v,                      \
                       uint8_t* dst, int len) {                                 \
-  const uint8_t* const end = dst + (len & ~1) * XSTEP;                         \
+  const uint8_t* const end = dst + (len & ~1) * (XSTEP);                       \
   while (dst != end) {                                                         \
     FUNC(y[0], u[0], v[0], dst);                                               \
-    FUNC(y[1], u[0], v[0], dst + XSTEP);                                       \
+    FUNC(y[1], u[0], v[0], dst + (XSTEP));                                     \
     y += 2;                                                                    \
     ++u;                                                                       \
     ++v;                                                                       \
-    dst += 2 * XSTEP;                                                          \
+    dst += 2 * (XSTEP);                                                        \
   }                                                                            \
   if (len & 1) {                                                               \
     FUNC(y[0], u[0], v[0], dst);                                               \
@@ -168,7 +116,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
 //-----------------------------------------------------------------------------
 // ARGB -> YUV converters
 
-static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
   int i;
   for (i = 0; i < width; ++i) {
     const uint32_t p = argb[i];
@@ -220,14 +168,14 @@ void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
 
 //-----------------------------------------------------------------------------
 
-static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) {
   int i;
   for (i = 0; i < width; ++i, rgb += 3) {
     y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
   }
 }
 
-static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) {
   int i;
   for (i = 0; i < width; ++i, bgr += 3) {
     y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
@@ -246,6 +194,7 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
 
 //-----------------------------------------------------------------------------
 
+#if !WEBP_NEON_OMIT_C_CODE
 #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
 static uint16_t clip_y(int v) {
   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
@@ -283,6 +232,7 @@ static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef MAX_Y
 
@@ -308,22 +258,26 @@ static volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used =
     (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
 
 extern void WebPInitConvertARGBToYUVSSE2(void);
+extern void WebPInitConvertARGBToYUVNEON(void);
 extern void WebPInitSharpYUVSSE2(void);
+extern void WebPInitSharpYUVNEON(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
   if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
 
-  WebPConvertARGBToY = ConvertARGBToY;
+  WebPConvertARGBToY = ConvertARGBToY_C;
   WebPConvertARGBToUV = WebPConvertARGBToUV_C;
 
-  WebPConvertRGB24ToY = ConvertRGB24ToY;
-  WebPConvertBGR24ToY = ConvertBGR24ToY;
+  WebPConvertRGB24ToY = ConvertRGB24ToY_C;
+  WebPConvertBGR24ToY = ConvertBGR24ToY_C;
 
   WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
 
+#if !WEBP_NEON_OMIT_C_CODE
   WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
   WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
   WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
+#endif
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -333,5 +287,23 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
     }
 #endif  // WEBP_USE_SSE2
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitConvertARGBToYUVNEON();
+    WebPInitSharpYUVNEON();
+  }
+#endif  // WEBP_USE_NEON
+
+  assert(WebPConvertARGBToY != NULL);
+  assert(WebPConvertARGBToUV != NULL);
+  assert(WebPConvertRGB24ToY != NULL);
+  assert(WebPConvertBGR24ToY != NULL);
+  assert(WebPConvertRGBA32ToUV != NULL);
+  assert(WebPSharpYUVUpdateY != NULL);
+  assert(WebPSharpYUVUpdateRGB != NULL);
+  assert(WebPSharpYUVFilterRow != NULL);
+
   rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/thirdparty/libwebp/dsp/yuv.h b/thirdparty/libwebp/src/dsp/yuv.h
index 1d33b5863b..c8a55832d4 100644
--- a/thirdparty/libwebp/dsp/yuv.h
+++ b/thirdparty/libwebp/src/dsp/yuv.h
@@ -35,18 +35,8 @@
 #ifndef WEBP_DSP_YUV_H_
 #define WEBP_DSP_YUV_H_
 
-#include "./dsp.h"
-#include "../dec/vp8_dec.h"
-
-#if defined(WEBP_EXPERIMENTAL_FEATURES)
-// Do NOT activate this feature for real compression. This is only experimental!
-// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
-// This colorspace is close to Rec.601's Y'CbCr model with the notable
-// difference of allowing larger range for luma/chroma.
-// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
-// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
-// #define USE_YUVj
-#endif
+#include "src/dsp/dsp.h"
+#include "src/dec/vp8_dec.h"
 
 //------------------------------------------------------------------------------
 // YUV -> RGB conversion
@@ -58,12 +48,8 @@ extern "C" {
 enum {
   YUV_FIX = 16,                    // fixed-point precision for RGB->YUV
   YUV_HALF = 1 << (YUV_FIX - 1),
-  YUV_MASK = (256 << YUV_FIX) - 1,
-  YUV_RANGE_MIN = -227,            // min value of r/g/b output
-  YUV_RANGE_MAX = 256 + 226,       // max value of r/g/b output
 
   YUV_FIX2 = 6,                   // fixed-point precision for YUV->RGB
-  YUV_HALF2 = 1 << YUV_FIX2 >> 1,
   YUV_MASK2 = (256 << YUV_FIX2) - 1
 };
 
@@ -111,7 +97,7 @@ static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
   const int b = VP8YUVToB(y, u);      // 5 usable bits
   const int rg = (r & 0xf8) | (g >> 5);
   const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   rgb[0] = gb;
   rgb[1] = rg;
 #else
@@ -127,7 +113,7 @@ static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
   const int b = VP8YUVToB(y, u);        // 4 usable bits
   const int rg = (r & 0xf0) | (g >> 4);
   const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
   argb[0] = ba;
   argb[1] = rg;
 #else
@@ -157,29 +143,26 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
   rgba[3] = 0xff;
 }
 
-// Must be called before everything, to initialize the tables.
-void VP8YUVInit(void);
-
 //-----------------------------------------------------------------------------
 // SSE2 extra functions (mostly for upsampling_sse2.c)
 
 #if defined(WEBP_USE_SSE2)
 
 // Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
-void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst);
-void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                   uint8_t* dst);
-void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst);
-void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                   uint8_t* dst);
-void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst);
-void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst);
+void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                        uint8_t* dst);
+void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst);
+void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
                         uint8_t* dst);
-void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                      uint8_t* dst);
+void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst);
+void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
+                             const uint8_t* v, uint8_t* dst);
+void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                           uint8_t* dst);
 
 #endif    // WEBP_USE_SSE2
 
@@ -192,8 +175,6 @@ static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
   return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
 }
 
-#ifndef USE_YUVj
-
 static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
   const int luma = 16839 * r + 33059 * g + 6420 * b;
   return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX;  // no need to clip
@@ -209,28 +190,6 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
   return VP8ClipUV(v, rounding);
 }
 
-#else
-
-// This JPEG-YUV colorspace, only for comparison!
-// These are also 16bit precision coefficients from Rec.601, but with full
-// [0..255] output range.
-static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
-  const int luma = 19595 * r + 38470 * g + 7471 * b;
-  return (luma + rounding) >> YUV_FIX;  // no need to clip
-}
-
-static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
-  const int u = -11058 * r - 21710 * g + 32768 * b;
-  return VP8ClipUV(u, rounding);
-}
-
-static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
-  const int v = 32768 * r - 27439 * g - 5329 * b;
-  return VP8ClipUV(v, rounding);
-}
-
-#endif    // USE_YUVj
-
 #ifdef __cplusplus
 }    // extern "C"
 #endif
diff --git a/thirdparty/libwebp/dsp/yuv_mips32.c b/thirdparty/libwebp/src/dsp/yuv_mips32.c
index e61aac571f..9d0a887824 100644
--- a/thirdparty/libwebp/dsp/yuv_mips32.c
+++ b/thirdparty/libwebp/src/dsp/yuv_mips32.c
@@ -12,11 +12,11 @@
 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS32)
 
-#include "./yuv.h"
+#include "src/dsp/yuv.h"
 
 //------------------------------------------------------------------------------
 // simple point-sampling
@@ -77,10 +77,10 @@ static void FUNC_NAME(const uint8_t* y,                                        \
   }                                                                            \
 }
 
-ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
-ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
-ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
-ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+ROW_FUNC(YuvToRgbRow_MIPS32,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow_MIPS32,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow_MIPS32,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow_MIPS32,     4, 2, 1, 0, 3)
 
 #undef ROW_FUNC
 
@@ -90,10 +90,10 @@ ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
 extern void WebPInitSamplersMIPS32(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPS32(void) {
-  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
-  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
-  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
-  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow_MIPS32;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow_MIPS32;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow_MIPS32;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow_MIPS32;
 }
 
 #else  // !WEBP_USE_MIPS32
diff --git a/thirdparty/libwebp/dsp/yuv_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/yuv_mips_dsp_r2.c
index 1720d4190f..cc8afcc756 100644
--- a/thirdparty/libwebp/dsp/yuv_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_mips_dsp_r2.c
@@ -12,11 +12,11 @@
 // Author(s):  Branimir Vasic (branimir.vasic@imgtec.com)
 //             Djordje Pesut  (djordje.pesut@imgtec.com)
 
-#include "./dsp.h"
+#include "src/dsp/dsp.h"
 
 #if defined(WEBP_USE_MIPS_DSP_R2)
 
-#include "./yuv.h"
+#include "src/dsp/yuv.h"
 
 //------------------------------------------------------------------------------
 // simple point-sampling
@@ -105,10 +105,10 @@ static void FUNC_NAME(const uint8_t* y,                                        \
   }                                                                            \
 }
 
-ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
-ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
-ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
-ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+ROW_FUNC(YuvToRgbRow_MIPSdspR2,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow_MIPSdspR2,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow_MIPSdspR2,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow_MIPSdspR2,     4, 2, 1, 0, 3)
 
 #undef ROW_FUNC
 #undef ASM_CLOBBER_LIST
@@ -121,10 +121,10 @@ ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
 extern void WebPInitSamplersMIPSdspR2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPSdspR2(void) {
-  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
-  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
-  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
-  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow_MIPSdspR2;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow_MIPSdspR2;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow_MIPSdspR2;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow_MIPSdspR2;
 }
 
 #else  // !WEBP_USE_MIPS_DSP_R2
diff --git a/thirdparty/libwebp/src/dsp/yuv_neon.c b/thirdparty/libwebp/src/dsp/yuv_neon.c
new file mode 100644
index 0000000000..a34d60248f
--- /dev/null
+++ b/thirdparty/libwebp/src/dsp/yuv_neon.c
@@ -0,0 +1,288 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/yuv.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "src/dsp/neon.h"
+
+//-----------------------------------------------------------------------------
+
+static uint8x8_t ConvertRGBToY_NEON(const uint8x8_t R,
+                                    const uint8x8_t G,
+                                    const uint8x8_t B) {
+  const uint16x8_t r = vmovl_u8(R);
+  const uint16x8_t g = vmovl_u8(G);
+  const uint16x8_t b = vmovl_u8(B);
+  const uint16x4_t r_lo = vget_low_u16(r);
+  const uint16x4_t r_hi = vget_high_u16(r);
+  const uint16x4_t g_lo = vget_low_u16(g);
+  const uint16x4_t g_hi = vget_high_u16(g);
+  const uint16x4_t b_lo = vget_low_u16(b);
+  const uint16x4_t b_hi = vget_high_u16(b);
+  const uint32x4_t tmp0_lo = vmull_n_u16(         r_lo, 16839u);
+  const uint32x4_t tmp0_hi = vmull_n_u16(         r_hi, 16839u);
+  const uint32x4_t tmp1_lo = vmlal_n_u16(tmp0_lo, g_lo, 33059u);
+  const uint32x4_t tmp1_hi = vmlal_n_u16(tmp0_hi, g_hi, 33059u);
+  const uint32x4_t tmp2_lo = vmlal_n_u16(tmp1_lo, b_lo, 6420u);
+  const uint32x4_t tmp2_hi = vmlal_n_u16(tmp1_hi, b_hi, 6420u);
+  const uint16x8_t Y1 = vcombine_u16(vrshrn_n_u32(tmp2_lo, 16),
+                                     vrshrn_n_u32(tmp2_hi, 16));
+  const uint16x8_t Y2 = vaddq_u16(Y1, vdupq_n_u16(16));
+  return vqmovn_u16(Y2);
+}
+
+static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
+    const uint8x8x3_t RGB = vld3_u8(rgb);
+    const uint8x8_t Y = ConvertRGBToY_NEON(RGB.val[0], RGB.val[1], RGB.val[2]);
+    vst1_u8(y + i, Y);
+  }
+  for (; i < width; ++i, rgb += 3) {   // left-over
+    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+  }
+}
+
+static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
+    const uint8x8x3_t BGR = vld3_u8(bgr);
+    const uint8x8_t Y = ConvertRGBToY_NEON(BGR.val[2], BGR.val[1], BGR.val[0]);
+    vst1_u8(y + i, Y);
+  }
+  for (; i < width; ++i, bgr += 3) {  // left-over
+    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+  }
+}
+
+static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i + 8 <= width; i += 8) {
+    const uint8x8x4_t RGB = vld4_u8((const uint8_t*)&argb[i]);
+    const uint8x8_t Y = ConvertRGBToY_NEON(RGB.val[2], RGB.val[1], RGB.val[0]);
+    vst1_u8(y + i, Y);
+  }
+  for (; i < width; ++i) {   // left-over
+    const uint32_t p = argb[i];
+    y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >>  0) & 0xff,
+                     YUV_HALF);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+// computes: DST_s16 = [(C0 * r + C1 * g + C2 * b) >> 16] + CST
+#define MULTIPLY_16b_PREAMBLE(r, g, b)                           \
+  const int16x4_t r_lo = vreinterpret_s16_u16(vget_low_u16(r));  \
+  const int16x4_t r_hi = vreinterpret_s16_u16(vget_high_u16(r)); \
+  const int16x4_t g_lo = vreinterpret_s16_u16(vget_low_u16(g));  \
+  const int16x4_t g_hi = vreinterpret_s16_u16(vget_high_u16(g)); \
+  const int16x4_t b_lo = vreinterpret_s16_u16(vget_low_u16(b));  \
+  const int16x4_t b_hi = vreinterpret_s16_u16(vget_high_u16(b))
+
+#define MULTIPLY_16b(C0, C1, C2, CST, DST_s16) do {              \
+  const int32x4_t tmp0_lo = vmull_n_s16(         r_lo, C0);      \
+  const int32x4_t tmp0_hi = vmull_n_s16(         r_hi, C0);      \
+  const int32x4_t tmp1_lo = vmlal_n_s16(tmp0_lo, g_lo, C1);      \
+  const int32x4_t tmp1_hi = vmlal_n_s16(tmp0_hi, g_hi, C1);      \
+  const int32x4_t tmp2_lo = vmlal_n_s16(tmp1_lo, b_lo, C2);      \
+  const int32x4_t tmp2_hi = vmlal_n_s16(tmp1_hi, b_hi, C2);      \
+  const int16x8_t tmp3 = vcombine_s16(vshrn_n_s32(tmp2_lo, 16),  \
+                                      vshrn_n_s32(tmp2_hi, 16)); \
+  DST_s16 = vaddq_s16(tmp3, vdupq_n_s16(CST));                   \
+} while (0)
+
+// This needs to be a macro, since (128 << SHIFT) needs to be an immediate.
+#define CONVERT_RGB_TO_UV(r, g, b, SHIFT, U_DST, V_DST) do {     \
+  MULTIPLY_16b_PREAMBLE(r, g, b);                                \
+  MULTIPLY_16b(-9719, -19081, 28800, 128 << SHIFT, U_DST);       \
+  MULTIPLY_16b(28800, -24116, -4684, 128 << SHIFT, V_DST);       \
+} while (0)
+
+static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
+                                   uint8_t* u, uint8_t* v, int width) {
+  int i;
+  for (i = 0; i + 8 <= width; i += 8, rgb += 4 * 8) {
+    const uint16x8x4_t RGB = vld4q_u16((const uint16_t*)rgb);
+    int16x8_t U, V;
+    CONVERT_RGB_TO_UV(RGB.val[0], RGB.val[1], RGB.val[2], 2, U, V);
+    vst1_u8(u + i, vqrshrun_n_s16(U, 2));
+    vst1_u8(v + i, vqrshrun_n_s16(V, 2));
+  }
+  for (; i < width; i += 1, rgb += 4) {
+    const int r = rgb[0], g = rgb[1], b = rgb[2];
+    u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+    v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+  }
+}
+
+static void ConvertARGBToUV_NEON(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                                 int src_width, int do_store) {
+  int i;
+  for (i = 0; i + 16 <= src_width; i += 16, u += 8, v += 8) {
+    const uint8x16x4_t RGB = vld4q_u8((const uint8_t*)&argb[i]);
+    const uint16x8_t R = vpaddlq_u8(RGB.val[2]);  // pair-wise adds
+    const uint16x8_t G = vpaddlq_u8(RGB.val[1]);
+    const uint16x8_t B = vpaddlq_u8(RGB.val[0]);
+    int16x8_t U_tmp, V_tmp;
+    CONVERT_RGB_TO_UV(R, G, B, 1, U_tmp, V_tmp);
+    {
+      const uint8x8_t U = vqrshrun_n_s16(U_tmp, 1);
+      const uint8x8_t V = vqrshrun_n_s16(V_tmp, 1);
+      if (do_store) {
+        vst1_u8(u, U);
+        vst1_u8(v, V);
+      } else {
+        const uint8x8_t prev_u = vld1_u8(u);
+        const uint8x8_t prev_v = vld1_u8(v);
+        vst1_u8(u, vrhadd_u8(U, prev_u));
+        vst1_u8(v, vrhadd_u8(V, prev_v));
+      }
+    }
+  }
+  if (i < src_width) {  // left-over
+    WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
+  }
+}
+
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitConvertARGBToYUVNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
+  WebPConvertRGB24ToY = ConvertRGB24ToY_NEON;
+  WebPConvertBGR24ToY = ConvertBGR24ToY_NEON;
+  WebPConvertARGBToY = ConvertARGBToY_NEON;
+  WebPConvertARGBToUV = ConvertARGBToUV_NEON;
+  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
+}
+
+//------------------------------------------------------------------------------
+
+#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
+static uint16_t clip_y_NEON(int v) {
+  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+}
+
+static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
+                                     uint16_t* dst, int len) {
+  int i;
+  const int16x8_t zero = vdupq_n_s16(0);
+  const int16x8_t max = vdupq_n_s16(MAX_Y);
+  uint64x2_t sum = vdupq_n_u64(0);
+  uint64_t diff;
+
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
+    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
+    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
+    const int16x8_t D = vsubq_s16(A, B);       // diff_y
+    const int16x8_t F = vaddq_s16(C, D);       // new_y
+    const uint16x8_t H =
+        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
+    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
+    vst1q_u16(dst + i, H);
+    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
+  }
+  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
+  for (; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)(dst[i]) + diff_y;
+    dst[i] = clip_y_NEON(new_y);
+    diff += (uint64_t)(abs(diff_y));
+  }
+  return diff;
+}
+
+static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
+                                   int16_t* dst, int len) {
+  int i;
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vld1q_s16(ref + i);
+    const int16x8_t B = vld1q_s16(src + i);
+    const int16x8_t C = vld1q_s16(dst + i);
+    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
+    const int16x8_t E = vaddq_s16(C, D);   // new_uv
+    vst1q_s16(dst + i, E);
+  }
+  for (; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
+                                   const uint16_t* best_y, uint16_t* out) {
+  int i;
+  const int16x8_t max = vdupq_n_s16(MAX_Y);
+  const int16x8_t zero = vdupq_n_s16(0);
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t a0 = vld1q_s16(A + i + 0);
+    const int16x8_t a1 = vld1q_s16(A + i + 1);
+    const int16x8_t b0 = vld1q_s16(B + i + 0);
+    const int16x8_t b1 = vld1q_s16(B + i + 1);
+    const int16x8_t a0b1 = vaddq_s16(a0, b1);
+    const int16x8_t a1b0 = vaddq_s16(a1, b0);
+    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
+    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
+    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
+    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
+    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
+    const int16x8_t d0 = vaddq_s16(c1, a0);
+    const int16x8_t d1 = vaddq_s16(c0, a1);
+    const int16x8_t e0 = vrshrq_n_s16(d0, 1);
+    const int16x8_t e1 = vrshrq_n_s16(d1, 1);
+    const int16x8x2_t f = vzipq_s16(e0, e1);
+    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
+    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
+    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
+    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
+    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
+    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
+    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
+    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
+  }
+  for (; i < len; ++i) {
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
+    out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
+  }
+}
+#undef MAX_Y
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitSharpYUVNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
+  WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
+  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
+  WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
+WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
+
+#endif  // WEBP_USE_NEON
diff --git a/thirdparty/libwebp/dsp/yuv_sse2.c b/thirdparty/libwebp/src/dsp/yuv_sse2.c
index e33c2bbafd..6810bf8d15 100644
--- a/thirdparty/libwebp/dsp/yuv_sse2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c
@@ -11,11 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./yuv.h"
+#include "src/dsp/yuv.h"
 
 #if defined(WEBP_USE_SSE2)
 
-#include "./common_sse2.h"
+#include "src/dsp/common_sse2.h"
 #include <stdlib.h>
 #include <emmintrin.h>
 
@@ -26,12 +26,12 @@
 // R = (19077 * y             + 26149 * v - 14234) >> 6
 // G = (19077 * y -  6419 * u - 13320 * v +  8708) >> 6
 // B = (19077 * y + 33050 * u             - 17685) >> 6
-static void ConvertYUV444ToRGB(const __m128i* const Y0,
-                               const __m128i* const U0,
-                               const __m128i* const V0,
-                               __m128i* const R,
-                               __m128i* const G,
-                               __m128i* const B) {
+static void ConvertYUV444ToRGB_SSE2(const __m128i* const Y0,
+                                    const __m128i* const U0,
+                                    const __m128i* const V0,
+                                    __m128i* const R,
+                                    __m128i* const G,
+                                    __m128i* const B) {
   const __m128i k19077 = _mm_set1_epi16(19077);
   const __m128i k26149 = _mm_set1_epi16(26149);
   const __m128i k14234 = _mm_set1_epi16(14234);
@@ -66,13 +66,13 @@ static void ConvertYUV444ToRGB(const __m128i* const Y0,
 }
 
 // Load the bytes into the *upper* part of 16b words. That's "<< 8", basically.
-static WEBP_INLINE __m128i Load_HI_16(const uint8_t* src) {
+static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) {
   const __m128i zero = _mm_setzero_si128();
   return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src));
 }
 
 // Load and replicate the U/V samples
-static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) {
+static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
   const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
@@ -80,29 +80,33 @@ static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) {
 }
 
 // Convert 32 samples of YUV444 to R/G/B
-static void YUV444ToRGB(const uint8_t* const y,
-                        const uint8_t* const u,
-                        const uint8_t* const v,
-                        __m128i* const R, __m128i* const G, __m128i* const B) {
-  const __m128i Y0 = Load_HI_16(y), U0 = Load_HI_16(u), V0 = Load_HI_16(v);
-  ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
+static void YUV444ToRGB_SSE2(const uint8_t* const y,
+                             const uint8_t* const u,
+                             const uint8_t* const v,
+                             __m128i* const R, __m128i* const G,
+                             __m128i* const B) {
+  const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_HI_16_SSE2(u),
+                V0 = Load_HI_16_SSE2(v);
+  ConvertYUV444ToRGB_SSE2(&Y0, &U0, &V0, R, G, B);
 }
 
 // Convert 32 samples of YUV420 to R/G/B
-static void YUV420ToRGB(const uint8_t* const y,
-                        const uint8_t* const u,
-                        const uint8_t* const v,
-                        __m128i* const R, __m128i* const G, __m128i* const B) {
-  const __m128i Y0 = Load_HI_16(y), U0 = Load_UV_HI_8(u), V0 = Load_UV_HI_8(v);
-  ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
+static void YUV420ToRGB_SSE2(const uint8_t* const y,
+                             const uint8_t* const u,
+                             const uint8_t* const v,
+                             __m128i* const R, __m128i* const G,
+                             __m128i* const B) {
+  const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_UV_HI_8_SSE2(u),
+                V0 = Load_UV_HI_8_SSE2(v);
+  ConvertYUV444ToRGB_SSE2(&Y0, &U0, &V0, R, G, B);
 }
 
 // Pack R/G/B/A results into 32b output.
-static WEBP_INLINE void PackAndStore4(const __m128i* const R,
-                                      const __m128i* const G,
-                                      const __m128i* const B,
-                                      const __m128i* const A,
-                                      uint8_t* const dst) {
+static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
+                                           const __m128i* const G,
+                                           const __m128i* const B,
+                                           const __m128i* const A,
+                                           uint8_t* const dst) {
   const __m128i rb = _mm_packus_epi16(*R, *B);
   const __m128i ga = _mm_packus_epi16(*G, *A);
   const __m128i rg = _mm_unpacklo_epi8(rb, ga);
@@ -114,12 +118,12 @@ static WEBP_INLINE void PackAndStore4(const __m128i* const R,
 }
 
 // Pack R/G/B/A results into 16b output.
-static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
-                                         const __m128i* const G,
-                                         const __m128i* const B,
-                                         const __m128i* const A,
-                                         uint8_t* const dst) {
-#if !defined(WEBP_SWAP_16BIT_CSP)
+static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
+                                              const __m128i* const G,
+                                              const __m128i* const B,
+                                              const __m128i* const A,
+                                              uint8_t* const dst) {
+#if (WEBP_SWAP_16BIT_CSP == 0)
   const __m128i rg0 = _mm_packus_epi16(*R, *G);
   const __m128i ba0 = _mm_packus_epi16(*B, *A);
 #else
@@ -136,10 +140,10 @@ static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
 }
 
 // Pack R/G/B results into 16b output.
-static WEBP_INLINE void PackAndStore565(const __m128i* const R,
-                                        const __m128i* const G,
-                                        const __m128i* const B,
-                                        uint8_t* const dst) {
+static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
+                                             const __m128i* const G,
+                                             const __m128i* const B,
+                                             uint8_t* const dst) {
   const __m128i r0 = _mm_packus_epi16(*R, *R);
   const __m128i g0 = _mm_packus_epi16(*G, *G);
   const __m128i b0 = _mm_packus_epi16(*B, *B);
@@ -149,7 +153,7 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
   const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
   const __m128i rg = _mm_or_si128(r1, g1);
   const __m128i gb = _mm_or_si128(g2, b1);
-#if !defined(WEBP_SWAP_16BIT_CSP)
+#if (WEBP_SWAP_16BIT_CSP == 0)
   const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb);
 #else
   const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);
@@ -160,10 +164,10 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
 // Pack the planar buffers
 // rrrr... rrrr... gggg... gggg... bbbb... bbbb....
 // triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
-static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
-                                    __m128i* const in2, __m128i* const in3,
-                                    __m128i* const in4, __m128i* const in5,
-                                    uint8_t* const rgb) {
+static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
+                                         __m128i* const in2, __m128i* const in3,
+                                         __m128i* const in4, __m128i* const in5,
+                                         uint8_t* const rgb) {
   // The input is 6 registers of sixteen 8b but for the sake of explanation,
   // let's take 6 registers of four 8b values.
   // To pack, we will keep taking one every two 8b integer and move it
@@ -186,69 +190,69 @@ static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
   _mm_storeu_si128((__m128i*)(rgb + 80), *in5);
 }
 
-void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst) {
+void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n < 32; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
-    PackAndStore4(&R, &G, &B, &kAlpha, dst);
+    YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
+    PackAndStore4_SSE2(&R, &G, &B, &kAlpha, dst);
   }
 }
 
-void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst) {
+void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n < 32; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
-    PackAndStore4(&B, &G, &R, &kAlpha, dst);
+    YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
+    PackAndStore4_SSE2(&B, &G, &R, &kAlpha, dst);
   }
 }
 
-void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                    uint8_t* dst) {
+void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n < 32; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
-    PackAndStore4(&kAlpha, &R, &G, &B, dst);
+    YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
+    PackAndStore4_SSE2(&kAlpha, &R, &G, &B, dst);
   }
 }
 
-void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                        uint8_t* dst) {
+void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
+                             const uint8_t* v, uint8_t* dst) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n < 32; n += 8, dst += 16) {
     __m128i R, G, B;
-    YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
-    PackAndStore4444(&R, &G, &B, &kAlpha, dst);
+    YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
+    PackAndStore4444_SSE2(&R, &G, &B, &kAlpha, dst);
   }
 }
 
-void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                      uint8_t* dst) {
+void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                           uint8_t* dst) {
   int n;
   for (n = 0; n < 32; n += 8, dst += 16) {
     __m128i R, G, B;
-    YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
-    PackAndStore565(&R, &G, &B, dst);
+    YUV444ToRGB_SSE2(y + n, u + n, v + n, &R, &G, &B);
+    PackAndStore565_SSE2(&R, &G, &B, dst);
   }
 }
 
-void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                   uint8_t* dst) {
+void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                        uint8_t* dst) {
   __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
   __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
 
-  YUV444ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
-  YUV444ToRGB(y + 8, u + 8, v + 8, &R1, &G1, &B1);
-  YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
-  YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+  YUV444ToRGB_SSE2(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+  YUV444ToRGB_SSE2(y + 8, u + 8, v + 8, &R1, &G1, &B1);
+  YUV444ToRGB_SSE2(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+  YUV444ToRGB_SSE2(y + 24, u + 24, v + 24, &R3, &G3, &B3);
 
   // Cast to 8b and store as RRRRGGGGBBBB.
   rgb0 = _mm_packus_epi16(R0, R1);
@@ -259,18 +263,18 @@ void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   rgb5 = _mm_packus_epi16(B2, B3);
 
   // Pack as RGBRGBRGBRGB.
-  PlanarTo24b(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
+  PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
 }
 
-void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                   uint8_t* dst) {
+void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                        uint8_t* dst) {
   __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
   __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
 
-  YUV444ToRGB(y +  0, u +  0, v +  0, &R0, &G0, &B0);
-  YUV444ToRGB(y +  8, u +  8, v +  8, &R1, &G1, &B1);
-  YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
-  YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+  YUV444ToRGB_SSE2(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+  YUV444ToRGB_SSE2(y +  8, u +  8, v +  8, &R1, &G1, &B1);
+  YUV444ToRGB_SSE2(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+  YUV444ToRGB_SSE2(y + 24, u + 24, v + 24, &R3, &G3, &B3);
 
   // Cast to 8b and store as BBBBGGGGRRRR.
   bgr0 = _mm_packus_epi16(B0, B1);
@@ -281,20 +285,21 @@ void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   bgr5= _mm_packus_epi16(R2, R3);
 
   // Pack as BGRBGRBGRBGR.
-  PlanarTo24b(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
+  PlanarTo24b_SSE2(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
 }
 
 //-----------------------------------------------------------------------------
 // Arbitrary-length row conversion functions
 
-static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                         uint8_t* dst, int len) {
+static void YuvToRgbaRow_SSE2(const uint8_t* y,
+                              const uint8_t* u, const uint8_t* v,
+                              uint8_t* dst, int len) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n + 8 <= len; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV420ToRGB(y, u, v, &R, &G, &B);
-    PackAndStore4(&R, &G, &B, &kAlpha, dst);
+    YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
+    PackAndStore4_SSE2(&R, &G, &B, &kAlpha, dst);
     y += 8;
     u += 4;
     v += 4;
@@ -308,14 +313,15 @@ static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   }
 }
 
-static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                         uint8_t* dst, int len) {
+static void YuvToBgraRow_SSE2(const uint8_t* y,
+                              const uint8_t* u, const uint8_t* v,
+                              uint8_t* dst, int len) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n + 8 <= len; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV420ToRGB(y, u, v, &R, &G, &B);
-    PackAndStore4(&B, &G, &R, &kAlpha, dst);
+    YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
+    PackAndStore4_SSE2(&B, &G, &R, &kAlpha, dst);
     y += 8;
     u += 4;
     v += 4;
@@ -329,14 +335,15 @@ static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   }
 }
 
-static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                         uint8_t* dst, int len) {
+static void YuvToArgbRow_SSE2(const uint8_t* y,
+                              const uint8_t* u, const uint8_t* v,
+                              uint8_t* dst, int len) {
   const __m128i kAlpha = _mm_set1_epi16(255);
   int n;
   for (n = 0; n + 8 <= len; n += 8, dst += 32) {
     __m128i R, G, B;
-    YUV420ToRGB(y, u, v, &R, &G, &B);
-    PackAndStore4(&kAlpha, &R, &G, &B, dst);
+    YUV420ToRGB_SSE2(y, u, v, &R, &G, &B);
+    PackAndStore4_SSE2(&kAlpha, &R, &G, &B, dst);
     y += 8;
     u += 4;
     v += 4;
@@ -350,17 +357,18 @@ static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   }
 }
 
-static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                        uint8_t* dst, int len) {
+static void YuvToRgbRow_SSE2(const uint8_t* y,
+                             const uint8_t* u, const uint8_t* v,
+                             uint8_t* dst, int len) {
   int n;
   for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
     __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
     __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
 
-    YUV420ToRGB(y +  0, u +  0, v +  0, &R0, &G0, &B0);
-    YUV420ToRGB(y +  8, u +  4, v +  4, &R1, &G1, &B1);
-    YUV420ToRGB(y + 16, u +  8, v +  8, &R2, &G2, &B2);
-    YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+    YUV420ToRGB_SSE2(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+    YUV420ToRGB_SSE2(y +  8, u +  4, v +  4, &R1, &G1, &B1);
+    YUV420ToRGB_SSE2(y + 16, u +  8, v +  8, &R2, &G2, &B2);
+    YUV420ToRGB_SSE2(y + 24, u + 12, v + 12, &R3, &G3, &B3);
 
     // Cast to 8b and store as RRRRGGGGBBBB.
     rgb0 = _mm_packus_epi16(R0, R1);
@@ -371,7 +379,7 @@ static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
     rgb5 = _mm_packus_epi16(B2, B3);
 
     // Pack as RGBRGBRGBRGB.
-    PlanarTo24b(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
+    PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
 
     y += 32;
     u += 16;
@@ -386,17 +394,18 @@ static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
   }
 }
 
-static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
-                        uint8_t* dst, int len) {
+static void YuvToBgrRow_SSE2(const uint8_t* y,
+                             const uint8_t* u, const uint8_t* v,
+                             uint8_t* dst, int len) {
   int n;
   for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
     __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
     __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
 
-    YUV420ToRGB(y +  0, u +  0, v +  0, &R0, &G0, &B0);
-    YUV420ToRGB(y +  8, u +  4, v +  4, &R1, &G1, &B1);
-    YUV420ToRGB(y + 16, u +  8, v +  8, &R2, &G2, &B2);
-    YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+    YUV420ToRGB_SSE2(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+    YUV420ToRGB_SSE2(y +  8, u +  4, v +  4, &R1, &G1, &B1);
+    YUV420ToRGB_SSE2(y + 16, u +  8, v +  8, &R2, &G2, &B2);
+    YUV420ToRGB_SSE2(y + 24, u + 12, v + 12, &R3, &G3, &B3);
 
     // Cast to 8b and store as BBBBGGGGRRRR.
     bgr0 = _mm_packus_epi16(B0, B1);
@@ -407,7 +416,7 @@ static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
     bgr5 = _mm_packus_epi16(R2, R3);
 
     // Pack as BGRBGRBGRBGR.
-    PlanarTo24b(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
+    PlanarTo24b_SSE2(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
 
     y += 32;
     u += 16;
@@ -428,11 +437,11 @@ static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
 extern void WebPInitSamplersSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
-  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
-  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
-  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
-  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
-  WebPSamplers[MODE_ARGB] = YuvToArgbRow;
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow_SSE2;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow_SSE2;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow_SSE2;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow_SSE2;
+  WebPSamplers[MODE_ARGB] = YuvToArgbRow_SSE2;
 }
 
 //------------------------------------------------------------------------------
@@ -445,7 +454,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
 
 // Function that inserts a value of the second half of the in buffer in between
 // every two char of the first half.
-static WEBP_INLINE void RGB24PackedToPlanarHelper(
+static WEBP_INLINE void RGB24PackedToPlanarHelper_SSE2(
     const __m128i* const in /*in[6]*/, __m128i* const out /*out[6]*/) {
   out[0] = _mm_unpacklo_epi8(in[0], in[3]);
   out[1] = _mm_unpackhi_epi8(in[0], in[3]);
@@ -458,8 +467,8 @@ static WEBP_INLINE void RGB24PackedToPlanarHelper(
 // Unpack the 8b input rgbrgbrgbrgb ... as contiguous registers:
 // rrrr... rrrr... gggg... gggg... bbbb... bbbb....
 // Similar to PlanarTo24bHelper(), but in reverse order.
-static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb,
-                                            __m128i* const out /*out[6]*/) {
+static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
+    const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
   __m128i tmp[6];
   tmp[0] = _mm_loadu_si128((const __m128i*)(rgb +  0));
   tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
@@ -468,16 +477,16 @@ static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb,
   tmp[4] = _mm_loadu_si128((const __m128i*)(rgb + 64));
   tmp[5] = _mm_loadu_si128((const __m128i*)(rgb + 80));
 
-  RGB24PackedToPlanarHelper(tmp, out);
-  RGB24PackedToPlanarHelper(out, tmp);
-  RGB24PackedToPlanarHelper(tmp, out);
-  RGB24PackedToPlanarHelper(out, tmp);
-  RGB24PackedToPlanarHelper(tmp, out);
+  RGB24PackedToPlanarHelper_SSE2(tmp, out);
+  RGB24PackedToPlanarHelper_SSE2(out, tmp);
+  RGB24PackedToPlanarHelper_SSE2(tmp, out);
+  RGB24PackedToPlanarHelper_SSE2(out, tmp);
+  RGB24PackedToPlanarHelper_SSE2(tmp, out);
 }
 
 // Convert 8 packed ARGB to r[], g[], b[]
-static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
-                                            __m128i* const rgb /*in[6]*/) {
+static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
+                                                 __m128i* const rgb /*in[6]*/) {
   const __m128i zero = _mm_setzero_si128();
   __m128i a0 = LOAD_16(argb + 0);
   __m128i a1 = LOAD_16(argb + 4);
@@ -511,10 +520,10 @@ static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
 } while (0)
 
 #define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
-static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
-                                      const __m128i* const G,
-                                      const __m128i* const B,
-                                      __m128i* const Y) {
+static WEBP_INLINE void ConvertRGBToY_SSE2(const __m128i* const R,
+                                           const __m128i* const G,
+                                           const __m128i* const B,
+                                           __m128i* const Y) {
   const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
   const __m128i kGB_y = MK_CST_16(16384, 6420);
   const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
@@ -526,10 +535,11 @@ static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
   TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
 }
 
-static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R,
-                                       const __m128i* const G,
-                                       const __m128i* const B,
-                                       __m128i* const U, __m128i* const V) {
+static WEBP_INLINE void ConvertRGBToUV_SSE2(const __m128i* const R,
+                                            const __m128i* const G,
+                                            const __m128i* const B,
+                                            __m128i* const U,
+                                            __m128i* const V) {
   const __m128i kRG_u = MK_CST_16(-9719, -19081);
   const __m128i kGB_u = MK_CST_16(0, 28800);
   const __m128i kRG_v = MK_CST_16(28800, 0);
@@ -549,14 +559,14 @@ static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R,
 #undef MK_CST_16
 #undef TRANSFORM
 
-static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
   const int max_width = width & ~31;
   int i;
   for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
     __m128i rgb_plane[6];
     int j;
 
-    RGB24PackedToPlanar(rgb, rgb_plane);
+    RGB24PackedToPlanar_SSE2(rgb, rgb_plane);
 
     for (j = 0; j < 2; ++j, i += 16) {
       const __m128i zero = _mm_setzero_si128();
@@ -566,13 +576,13 @@ static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
       r = _mm_unpacklo_epi8(rgb_plane[0 + j], zero);
       g = _mm_unpacklo_epi8(rgb_plane[2 + j], zero);
       b = _mm_unpacklo_epi8(rgb_plane[4 + j], zero);
-      ConvertRGBToY(&r, &g, &b, &Y0);
+      ConvertRGBToY_SSE2(&r, &g, &b, &Y0);
 
       // Convert to 16-bit Y.
       r = _mm_unpackhi_epi8(rgb_plane[0 + j], zero);
       g = _mm_unpackhi_epi8(rgb_plane[2 + j], zero);
       b = _mm_unpackhi_epi8(rgb_plane[4 + j], zero);
-      ConvertRGBToY(&r, &g, &b, &Y1);
+      ConvertRGBToY_SSE2(&r, &g, &b, &Y1);
 
       // Cast to 8-bit and store.
       STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
@@ -583,14 +593,14 @@ static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
   }
 }
 
-static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
   const int max_width = width & ~31;
   int i;
   for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
     __m128i bgr_plane[6];
     int j;
 
-    RGB24PackedToPlanar(bgr, bgr_plane);
+    RGB24PackedToPlanar_SSE2(bgr, bgr_plane);
 
     for (j = 0; j < 2; ++j, i += 16) {
       const __m128i zero = _mm_setzero_si128();
@@ -600,13 +610,13 @@ static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
       b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
       g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
       r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
-      ConvertRGBToY(&r, &g, &b, &Y0);
+      ConvertRGBToY_SSE2(&r, &g, &b, &Y0);
 
       // Convert to 16-bit Y.
       b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
       g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
       r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
-      ConvertRGBToY(&r, &g, &b, &Y1);
+      ConvertRGBToY_SSE2(&r, &g, &b, &Y1);
 
       // Cast to 8-bit and store.
       STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
@@ -617,14 +627,14 @@ static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
   }
 }
 
-static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+static void ConvertARGBToY_SSE2(const uint32_t* argb, uint8_t* y, int width) {
   const int max_width = width & ~15;
   int i;
   for (i = 0; i < max_width; i += 16) {
     __m128i Y0, Y1, rgb[6];
-    RGB32PackedToPlanar(&argb[i], rgb);
-    ConvertRGBToY(&rgb[0], &rgb[2], &rgb[4], &Y0);
-    ConvertRGBToY(&rgb[1], &rgb[3], &rgb[5], &Y1);
+    RGB32PackedToPlanar_SSE2(&argb[i], rgb);
+    ConvertRGBToY_SSE2(&rgb[0], &rgb[2], &rgb[4], &Y0);
+    ConvertRGBToY_SSE2(&rgb[1], &rgb[3], &rgb[5], &Y1);
     STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
   }
   for (; i < width; ++i) {   // left-over
@@ -636,31 +646,33 @@ static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
 
 // Horizontal add (doubled) of two 16b values, result is 16b.
 // in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
-static void HorizontalAddPack(const __m128i* const A, const __m128i* const B,
-                              __m128i* const out) {
+static void HorizontalAddPack_SSE2(const __m128i* const A,
+                                   const __m128i* const B,
+                                   __m128i* const out) {
   const __m128i k2 = _mm_set1_epi16(2);
   const __m128i C = _mm_madd_epi16(*A, k2);
   const __m128i D = _mm_madd_epi16(*B, k2);
   *out = _mm_packs_epi32(C, D);
 }
 
-static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v,
-                            int src_width, int do_store) {
+static void ConvertARGBToUV_SSE2(const uint32_t* argb,
+                                 uint8_t* u, uint8_t* v,
+                                 int src_width, int do_store) {
   const int max_width = src_width & ~31;
   int i;
   for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
     __m128i rgb[6], U0, V0, U1, V1;
-    RGB32PackedToPlanar(&argb[i], rgb);
-    HorizontalAddPack(&rgb[0], &rgb[1], &rgb[0]);
-    HorizontalAddPack(&rgb[2], &rgb[3], &rgb[2]);
-    HorizontalAddPack(&rgb[4], &rgb[5], &rgb[4]);
-    ConvertRGBToUV(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
-
-    RGB32PackedToPlanar(&argb[i + 16], rgb);
-    HorizontalAddPack(&rgb[0], &rgb[1], &rgb[0]);
-    HorizontalAddPack(&rgb[2], &rgb[3], &rgb[2]);
-    HorizontalAddPack(&rgb[4], &rgb[5], &rgb[4]);
-    ConvertRGBToUV(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
+    RGB32PackedToPlanar_SSE2(&argb[i], rgb);
+    HorizontalAddPack_SSE2(&rgb[0], &rgb[1], &rgb[0]);
+    HorizontalAddPack_SSE2(&rgb[2], &rgb[3], &rgb[2]);
+    HorizontalAddPack_SSE2(&rgb[4], &rgb[5], &rgb[4]);
+    ConvertRGBToUV_SSE2(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
+
+    RGB32PackedToPlanar_SSE2(&argb[i + 16], rgb);
+    HorizontalAddPack_SSE2(&rgb[0], &rgb[1], &rgb[0]);
+    HorizontalAddPack_SSE2(&rgb[2], &rgb[3], &rgb[2]);
+    HorizontalAddPack_SSE2(&rgb[4], &rgb[5], &rgb[4]);
+    ConvertRGBToUV_SSE2(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
 
     U0 = _mm_packus_epi16(U0, U1);
     V0 = _mm_packus_epi16(V0, V1);
@@ -679,10 +691,9 @@ static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v,
 }
 
 // Convert 16 packed ARGB 16b-values to r[], g[], b[]
-static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
-                                                 __m128i* const r,
-                                                 __m128i* const g,
-                                                 __m128i* const b) {
+static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
+    const uint16_t* const rgbx,
+    __m128i* const r, __m128i* const g, __m128i* const b) {
   const __m128i in0 = LOAD_16(rgbx +  0);  // r0 | g0 | b0 |x| r1 | g1 | b1 |x
   const __m128i in1 = LOAD_16(rgbx +  8);  // r2 | g2 | b2 |x| r3 | g3 | b3 |x
   const __m128i in2 = LOAD_16(rgbx + 16);  // r4 | ...
@@ -701,16 +712,16 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
   *b = _mm_unpacklo_epi64(B1, B3);
 }
 
-static void ConvertRGBA32ToUV(const uint16_t* rgb,
-                              uint8_t* u, uint8_t* v, int width) {
+static void ConvertRGBA32ToUV_SSE2(const uint16_t* rgb,
+                                   uint8_t* u, uint8_t* v, int width) {
   const int max_width = width & ~15;
   const uint16_t* const last_rgb = rgb + 4 * max_width;
   while (rgb < last_rgb) {
     __m128i r, g, b, U0, V0, U1, V1;
-    RGBA32PackedToPlanar_16b(rgb +  0, &r, &g, &b);
-    ConvertRGBToUV(&r, &g, &b, &U0, &V0);
-    RGBA32PackedToPlanar_16b(rgb + 32, &r, &g, &b);
-    ConvertRGBToUV(&r, &g, &b, &U1, &V1);
+    RGBA32PackedToPlanar_16b_SSE2(rgb +  0, &r, &g, &b);
+    ConvertRGBToUV_SSE2(&r, &g, &b, &U0, &V0);
+    RGBA32PackedToPlanar_16b_SSE2(rgb + 32, &r, &g, &b);
+    ConvertRGBToUV_SSE2(&r, &g, &b, &U1, &V1);
     STORE_16(_mm_packus_epi16(U0, U1), u);
     STORE_16(_mm_packus_epi16(V0, V1), v);
     u += 16;
@@ -727,13 +738,13 @@ static void ConvertRGBA32ToUV(const uint16_t* rgb,
 extern void WebPInitConvertARGBToYUVSSE2(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
-  WebPConvertARGBToY = ConvertARGBToY;
-  WebPConvertARGBToUV = ConvertARGBToUV;
+  WebPConvertARGBToY = ConvertARGBToY_SSE2;
+  WebPConvertARGBToUV = ConvertARGBToUV_SSE2;
 
-  WebPConvertRGB24ToY = ConvertRGB24ToY;
-  WebPConvertBGR24ToY = ConvertBGR24ToY;
+  WebPConvertRGB24ToY = ConvertRGB24ToY_SSE2;
+  WebPConvertBGR24ToY = ConvertBGR24ToY_SSE2;
 
-  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV;
+  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
 }
 
 //------------------------------------------------------------------------------
diff --git a/thirdparty/libwebp/enc/alpha_enc.c b/thirdparty/libwebp/src/enc/alpha_enc.c
index 5a2c931f92..7e8d87f22e 100644
--- a/thirdparty/libwebp/enc/alpha_enc.c
+++ b/thirdparty/libwebp/src/enc/alpha_enc.c
@@ -14,12 +14,12 @@
 #include <assert.h>
 #include <stdlib.h>
 
-#include "./vp8i_enc.h"
-#include "../dsp/dsp.h"
-#include "../utils/filters_utils.h"
-#include "../utils/quant_levels_utils.h"
-#include "../utils/utils.h"
-#include "../webp/format_constants.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/filters_utils.h"
+#include "src/utils/quant_levels_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
 
 // -----------------------------------------------------------------------------
 // Encodes the given alpha data via specified compression method 'method'.
@@ -44,11 +44,11 @@
 //           invalid quality or method, or
 //           memory allocation for the compressed data fails.
 
-#include "../enc/vp8li_enc.h"
+#include "src/enc/vp8li_enc.h"
 
 static int EncodeLossless(const uint8_t* const data, int width, int height,
                           int effort_level,  // in [0..6] range
-                          VP8LBitWriter* const bw,
+                          int use_quality_100, VP8LBitWriter* const bw,
                           WebPAuxStats* const stats) {
   int ok = 0;
   WebPConfig config;
@@ -76,7 +76,10 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
   // Set a low default quality for encoding alpha. Ensure that Alpha quality at
   // lower methods (3 and below) is less than the threshold for triggering
   // costly 'BackwardReferencesTraceBackwards'.
-  config.quality = 8.f * effort_level;
+  // If the alpha quality is set to 100 and the method to 6, allow for a high
+  // lossless quality to trigger the cruncher.
+  config.quality =
+      (use_quality_100 && effort_level == 6) ? 100 : 8.f * effort_level;
   assert(config.quality >= 0 && config.quality <= 100.f);
 
   // TODO(urvang): Temporary fix to avoid generating images that trigger
@@ -134,7 +137,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
   if (method != ALPHA_NO_COMPRESSION) {
     ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3);
     ok = ok && EncodeLossless(alpha_src, width, height, effort_level,
-                              &tmp_bw, &result->stats);
+                              !reduce_levels, &tmp_bw, &result->stats);
     if (ok) {
       output = VP8LBitWriterFinish(&tmp_bw);
       output_size = VP8LBitWriterNumBytes(&tmp_bw);
@@ -264,6 +267,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
                              reduce_levels, effort_level, NULL, &best);
   }
   if (ok) {
+#if !defined(WEBP_DISABLE_STATS)
     if (stats != NULL) {
       stats->lossless_features = best.stats.lossless_features;
       stats->histogram_bits = best.stats.histogram_bits;
@@ -274,6 +278,9 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
       stats->lossless_hdr_size = best.stats.lossless_hdr_size;
       stats->lossless_data_size = best.stats.lossless_data_size;
     }
+#else
+    (void)stats;
+#endif
     *output_size = VP8BitWriterSize(&best.bw);
     *output = VP8BitWriterBuf(&best.bw);
   } else {
@@ -339,10 +346,12 @@ static int EncodeAlpha(VP8Encoder* const enc,
     ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
                                filter, reduce_levels, effort_level, output,
                                output_size, pic->stats);
+#if !defined(WEBP_DISABLE_STATS)
     if (pic->stats != NULL) {  // need stats?
       pic->stats->coded_size += (int)(*output_size);
       enc->sse_[3] = sse;
     }
+#endif
   }
 
   WebPSafeFree(quant_alpha);
diff --git a/thirdparty/libwebp/enc/analysis_enc.c b/thirdparty/libwebp/src/enc/analysis_enc.c
index dce159b316..08f471f5f8 100644
--- a/thirdparty/libwebp/enc/analysis_enc.c
+++ b/thirdparty/libwebp/src/enc/analysis_enc.c
@@ -15,9 +15,9 @@
 #include <string.h>
 #include <assert.h>
 
-#include "./vp8i_enc.h"
-#include "./cost_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/cost_enc.h"
+#include "src/utils/utils.h"
 
 #define MAX_ITERS_K_MEANS  6
 
diff --git a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
new file mode 100644
index 0000000000..7175496c7f
--- /dev/null
+++ b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
@@ -0,0 +1,790 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Improves a given set of backward references by analyzing its bit cost.
+// The algorithm is similar to the Zopfli compression algorithm but tailored to
+// images.
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+//
+
+#include <assert.h>
+
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/dsp/lossless_common.h"
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/utils.h"
+
+#define VALUES_IN_BYTE 256
+
+extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
+extern int VP8LDistanceToPlaneCode(int xsize, int dist);
+extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
+                                      const PixOrCopy v);
+
+typedef struct {
+  double alpha_[VALUES_IN_BYTE];
+  double red_[VALUES_IN_BYTE];
+  double blue_[VALUES_IN_BYTE];
+  double distance_[NUM_DISTANCE_CODES];
+  double* literal_;
+} CostModel;
+
+static void ConvertPopulationCountTableToBitEstimates(
+    int num_symbols, const uint32_t population_counts[], double output[]) {
+  uint32_t sum = 0;
+  int nonzeros = 0;
+  int i;
+  for (i = 0; i < num_symbols; ++i) {
+    sum += population_counts[i];
+    if (population_counts[i] > 0) {
+      ++nonzeros;
+    }
+  }
+  if (nonzeros <= 1) {
+    memset(output, 0, num_symbols * sizeof(*output));
+  } else {
+    const double logsum = VP8LFastLog2(sum);
+    for (i = 0; i < num_symbols; ++i) {
+      output[i] = logsum - VP8LFastLog2(population_counts[i]);
+    }
+  }
+}
+
+static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
+                          const VP8LBackwardRefs* const refs) {
+  int ok = 0;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
+  if (histo == NULL) goto Error;
+
+  // The following code is similar to VP8LHistogramCreate but converts the
+  // distance to plane code.
+  VP8LHistogramInit(histo, cache_bits);
+  while (VP8LRefsCursorOk(&c)) {
+    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode,
+                                    xsize);
+    VP8LRefsCursorNext(&c);
+  }
+
+  ConvertPopulationCountTableToBitEstimates(
+      VP8LHistogramNumCodes(histo->palette_code_bits_),
+      histo->literal_, m->literal_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->red_, m->red_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->blue_, m->blue_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->alpha_, m->alpha_);
+  ConvertPopulationCountTableToBitEstimates(
+      NUM_DISTANCE_CODES, histo->distance_, m->distance_);
+  ok = 1;
+
+ Error:
+  VP8LFreeHistogram(histo);
+  return ok;
+}
+
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+  return m->alpha_[v >> 24] +
+         m->red_[(v >> 16) & 0xff] +
+         m->literal_[(v >> 8) & 0xff] +
+         m->blue_[v & 0xff];
+}
+
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
+  return m->literal_[literal_idx];
+}
+
+static WEBP_INLINE double GetLengthCost(const CostModel* const m,
+                                        uint32_t length) {
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(length, &code, &extra_bits);
+  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
+}
+
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
+                                          uint32_t distance) {
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
+  return m->distance_[code] + extra_bits;
+}
+
+static WEBP_INLINE void AddSingleLiteralWithCostModel(
+    const uint32_t* const argb, VP8LColorCache* const hashers,
+    const CostModel* const cost_model, int idx, int use_color_cache,
+    float prev_cost, float* const cost, uint16_t* const dist_array) {
+  double cost_val = prev_cost;
+  const uint32_t color = argb[idx];
+  const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
+  if (ix >= 0) {
+    // use_color_cache is true and hashers contains color
+    const double mul0 = 0.68;
+    cost_val += GetCacheCost(cost_model, ix) * mul0;
+  } else {
+    const double mul1 = 0.82;
+    if (use_color_cache) VP8LColorCacheInsert(hashers, color);
+    cost_val += GetLiteralCost(cost_model, color) * mul1;
+  }
+  if (cost[idx] > cost_val) {
+    cost[idx] = (float)cost_val;
+    dist_array[idx] = 1;  // only one is inserted.
+  }
+}
+
+// -----------------------------------------------------------------------------
+// CostManager and interval handling
+
+// Empirical value to avoid high memory consumption but good for performance.
+#define COST_CACHE_INTERVAL_SIZE_MAX 500
+
+// To perform backward reference every pixel at index index_ is considered and
+// the cost for the MAX_LENGTH following pixels computed. Those following pixels
+// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of:
+//     cost_ = distance cost at index + GetLengthCost(cost_model, k)
+// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an
+// array of size MAX_LENGTH.
+// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the
+// minimal values using intervals of constant cost.
+// An interval is defined by the index_ of the pixel that generated it and
+// is only useful in a range of indices from start_ to end_ (exclusive), i.e.
+// it contains the minimum value for pixels between start_ and end_.
+// Intervals are stored in a linked list and ordered by start_. When a new
+// interval has a better value, old intervals are split or removed. There are
+// therefore no overlapping intervals.
+typedef struct CostInterval CostInterval;
+struct CostInterval {
+  float cost_;
+  int start_;
+  int end_;
+  int index_;
+  CostInterval* previous_;
+  CostInterval* next_;
+};
+
+// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
+typedef struct {
+  double cost_;
+  int start_;
+  int end_;       // Exclusive.
+} CostCacheInterval;
+
+// This structure is in charge of managing intervals and costs.
+// It caches the different CostCacheInterval, caches the different
+// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose
+// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX).
+#define COST_MANAGER_MAX_FREE_LIST 10
+typedef struct {
+  CostInterval* head_;
+  int count_;  // The number of stored intervals.
+  CostCacheInterval* cache_intervals_;
+  size_t cache_intervals_size_;
+  double cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
+  float* costs_;
+  uint16_t* dist_array_;
+  // Most of the time, we only need few intervals -> use a free-list, to avoid
+  // fragmentation with small allocs in most common cases.
+  CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST];
+  CostInterval* free_intervals_;
+  // These are regularly malloc'd remains. This list can't grow larger than than
+  // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note.
+  CostInterval* recycled_intervals_;
+} CostManager;
+
+static void CostIntervalAddToFreeList(CostManager* const manager,
+                                      CostInterval* const interval) {
+  interval->next_ = manager->free_intervals_;
+  manager->free_intervals_ = interval;
+}
+
+static int CostIntervalIsInFreeList(const CostManager* const manager,
+                                    const CostInterval* const interval) {
+  return (interval >= &manager->intervals_[0] &&
+          interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]);
+}
+
+static void CostManagerInitFreeList(CostManager* const manager) {
+  int i;
+  manager->free_intervals_ = NULL;
+  for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) {
+    CostIntervalAddToFreeList(manager, &manager->intervals_[i]);
+  }
+}
+
+static void DeleteIntervalList(CostManager* const manager,
+                               const CostInterval* interval) {
+  while (interval != NULL) {
+    const CostInterval* const next = interval->next_;
+    if (!CostIntervalIsInFreeList(manager, interval)) {
+      WebPSafeFree((void*)interval);
+    }  // else: do nothing
+    interval = next;
+  }
+}
+
+static void CostManagerClear(CostManager* const manager) {
+  if (manager == NULL) return;
+
+  WebPSafeFree(manager->costs_);
+  WebPSafeFree(manager->cache_intervals_);
+
+  // Clear the interval lists.
+  DeleteIntervalList(manager, manager->head_);
+  manager->head_ = NULL;
+  DeleteIntervalList(manager, manager->recycled_intervals_);
+  manager->recycled_intervals_ = NULL;
+
+  // Reset pointers, count_ and cache_intervals_size_.
+  memset(manager, 0, sizeof(*manager));
+  CostManagerInitFreeList(manager);
+}
+
+static int CostManagerInit(CostManager* const manager,
+                           uint16_t* const dist_array, int pix_count,
+                           const CostModel* const cost_model) {
+  int i;
+  const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count;
+
+  manager->costs_ = NULL;
+  manager->cache_intervals_ = NULL;
+  manager->head_ = NULL;
+  manager->recycled_intervals_ = NULL;
+  manager->count_ = 0;
+  manager->dist_array_ = dist_array;
+  CostManagerInitFreeList(manager);
+
+  // Fill in the cost_cache_.
+  manager->cache_intervals_size_ = 1;
+  manager->cost_cache_[0] = GetLengthCost(cost_model, 0);
+  for (i = 1; i < cost_cache_size; ++i) {
+    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
+    // Get the number of bound intervals.
+    if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
+      ++manager->cache_intervals_size_;
+    }
+  }
+
+  // With the current cost model, we usually have below 20 intervals.
+  // The worst case scenario with a cost model would be if every length has a
+  // different cost, hence MAX_LENGTH but that is impossible with the current
+  // implementation that spirals around a pixel.
+  assert(manager->cache_intervals_size_ <= MAX_LENGTH);
+  manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc(
+      manager->cache_intervals_size_, sizeof(*manager->cache_intervals_));
+  if (manager->cache_intervals_ == NULL) {
+    CostManagerClear(manager);
+    return 0;
+  }
+
+  // Fill in the cache_intervals_.
+  {
+    CostCacheInterval* cur = manager->cache_intervals_;
+
+    // Consecutive values in cost_cache_ are compared and if a big enough
+    // difference is found, a new interval is created and bounded.
+    cur->start_ = 0;
+    cur->end_ = 1;
+    cur->cost_ = manager->cost_cache_[0];
+    for (i = 1; i < cost_cache_size; ++i) {
+      const double cost_val = manager->cost_cache_[i];
+      if (cost_val != cur->cost_) {
+        ++cur;
+        // Initialize an interval.
+        cur->start_ = i;
+        cur->cost_ = cost_val;
+      }
+      cur->end_ = i + 1;
+    }
+  }
+
+  manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
+  if (manager->costs_ == NULL) {
+    CostManagerClear(manager);
+    return 0;
+  }
+  // Set the initial costs_ high for every pixel as we will keep the minimum.
+  for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
+
+  return 1;
+}
+
+// Given the cost and the position that define an interval, update the cost at
+// pixel 'i' if it is smaller than the previously computed value.
+static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
+                                   int position, float cost) {
+  const int k = i - position;
+  assert(k >= 0 && k < MAX_LENGTH);
+
+  if (manager->costs_[i] > cost) {
+    manager->costs_[i] = cost;
+    manager->dist_array_[i] = k + 1;
+  }
+}
+
+// Given the cost and the position that define an interval, update the cost for
+// all the pixels between 'start' and 'end' excluded.
+static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager,
+                                              int start, int end, int position,
+                                              float cost) {
+  int i;
+  for (i = start; i < end; ++i) UpdateCost(manager, i, position, cost);
+}
+
+// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'.
+static WEBP_INLINE void ConnectIntervals(CostManager* const manager,
+                                         CostInterval* const prev,
+                                         CostInterval* const next) {
+  if (prev != NULL) {
+    prev->next_ = next;
+  } else {
+    manager->head_ = next;
+  }
+
+  if (next != NULL) next->previous_ = prev;
+}
+
+// Pop an interval in the manager.
+static WEBP_INLINE void PopInterval(CostManager* const manager,
+                                    CostInterval* const interval) {
+  if (interval == NULL) return;
+
+  ConnectIntervals(manager, interval->previous_, interval->next_);
+  if (CostIntervalIsInFreeList(manager, interval)) {
+    CostIntervalAddToFreeList(manager, interval);
+  } else {  // recycle regularly malloc'd intervals too
+    interval->next_ = manager->recycled_intervals_;
+    manager->recycled_intervals_ = interval;
+  }
+  --manager->count_;
+  assert(manager->count_ >= 0);
+}
+
+// Update the cost at index i by going over all the stored intervals that
+// overlap with i.
+// If 'do_clean_intervals' is set to something different than 0, intervals that
+// end before 'i' will be popped.
+static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i,
+                                          int do_clean_intervals) {
+  CostInterval* current = manager->head_;
+
+  while (current != NULL && current->start_ <= i) {
+    CostInterval* const next = current->next_;
+    if (current->end_ <= i) {
+      if (do_clean_intervals) {
+        // We have an outdated interval, remove it.
+        PopInterval(manager, current);
+      }
+    } else {
+      UpdateCost(manager, i, current->index_, current->cost_);
+    }
+    current = next;
+  }
+}
+
+// Given a current orphan interval and its previous interval, before
+// it was orphaned (which can be NULL), set it at the right place in the list
+// of intervals using the start_ ordering and the previous interval as a hint.
+static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
+                                               CostInterval* const current,
+                                               CostInterval* previous) {
+  assert(current != NULL);
+
+  if (previous == NULL) previous = manager->head_;
+  while (previous != NULL && current->start_ < previous->start_) {
+    previous = previous->previous_;
+  }
+  while (previous != NULL && previous->next_ != NULL &&
+         previous->next_->start_ < current->start_) {
+    previous = previous->next_;
+  }
+
+  if (previous != NULL) {
+    ConnectIntervals(manager, current, previous->next_);
+  } else {
+    ConnectIntervals(manager, current, manager->head_);
+  }
+  ConnectIntervals(manager, previous, current);
+}
+
+// Insert an interval in the list contained in the manager by starting at
+// interval_in as a hint. The intervals are sorted by start_ value.
+static WEBP_INLINE void InsertInterval(CostManager* const manager,
+                                       CostInterval* const interval_in,
+                                       float cost, int position, int start,
+                                       int end) {
+  CostInterval* interval_new;
+
+  if (start >= end) return;
+  if (manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) {
+    // Serialize the interval if we cannot store it.
+    UpdateCostPerInterval(manager, start, end, position, cost);
+    return;
+  }
+  if (manager->free_intervals_ != NULL) {
+    interval_new = manager->free_intervals_;
+    manager->free_intervals_ = interval_new->next_;
+  } else if (manager->recycled_intervals_ != NULL) {
+    interval_new = manager->recycled_intervals_;
+    manager->recycled_intervals_ = interval_new->next_;
+  } else {  // malloc for good
+    interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new));
+    if (interval_new == NULL) {
+      // Write down the interval if we cannot create it.
+      UpdateCostPerInterval(manager, start, end, position, cost);
+      return;
+    }
+  }
+
+  interval_new->cost_ = cost;
+  interval_new->index_ = position;
+  interval_new->start_ = start;
+  interval_new->end_ = end;
+  PositionOrphanInterval(manager, interval_new, interval_in);
+
+  ++manager->count_;
+}
+
+// Given a new cost interval defined by its start at position, its length value
+// and distance_cost, add its contributions to the previous intervals and costs.
+// If handling the interval or one of its subintervals becomes to heavy, its
+// contribution is added to the costs right away.
+static WEBP_INLINE void PushInterval(CostManager* const manager,
+                                     double distance_cost, int position,
+                                     int len) {
+  size_t i;
+  CostInterval* interval = manager->head_;
+  CostInterval* interval_next;
+  const CostCacheInterval* const cost_cache_intervals =
+      manager->cache_intervals_;
+  // If the interval is small enough, no need to deal with the heavy
+  // interval logic, just serialize it right away. This constant is empirical.
+  const int kSkipDistance = 10;
+
+  if (len < kSkipDistance) {
+    int j;
+    for (j = position; j < position + len; ++j) {
+      const int k = j - position;
+      float cost_tmp;
+      assert(k >= 0 && k < MAX_LENGTH);
+      cost_tmp = (float)(distance_cost + manager->cost_cache_[k]);
+
+      if (manager->costs_[j] > cost_tmp) {
+        manager->costs_[j] = cost_tmp;
+        manager->dist_array_[j] = k + 1;
+      }
+    }
+    return;
+  }
+
+  for (i = 0; i < manager->cache_intervals_size_ &&
+              cost_cache_intervals[i].start_ < len;
+       ++i) {
+    // Define the intersection of the ith interval with the new one.
+    int start = position + cost_cache_intervals[i].start_;
+    const int end = position + (cost_cache_intervals[i].end_ > len
+                                 ? len
+                                 : cost_cache_intervals[i].end_);
+    const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_);
+
+    for (; interval != NULL && interval->start_ < end;
+         interval = interval_next) {
+      interval_next = interval->next_;
+
+      // Make sure we have some overlap
+      if (start >= interval->end_) continue;
+
+      if (cost >= interval->cost_) {
+        // When intervals are represented, the lower, the better.
+        // [**********************************************************[
+        // start                                                    end
+        //                   [----------------------------------[
+        //                   interval->start_       interval->end_
+        // If we are worse than what we already have, add whatever we have so
+        // far up to interval.
+        const int start_new = interval->end_;
+        InsertInterval(manager, interval, cost, position, start,
+                       interval->start_);
+        start = start_new;
+        if (start >= end) break;
+        continue;
+      }
+
+      if (start <= interval->start_) {
+        if (interval->end_ <= end) {
+          //                   [----------------------------------[
+          //                   interval->start_       interval->end_
+          // [**************************************************************[
+          // start                                                        end
+          // We can safely remove the old interval as it is fully included.
+          PopInterval(manager, interval);
+        } else {
+          //              [------------------------------------[
+          //              interval->start_        interval->end_
+          // [*****************************[
+          // start                       end
+          interval->start_ = end;
+          break;
+        }
+      } else {
+        if (end < interval->end_) {
+          // [--------------------------------------------------------------[
+          // interval->start_                                  interval->end_
+          //                     [*****************************[
+          //                     start                       end
+          // We have to split the old interval as it fully contains the new one.
+          const int end_original = interval->end_;
+          interval->end_ = start;
+          InsertInterval(manager, interval, interval->cost_, interval->index_,
+                         end, end_original);
+          interval = interval->next_;
+          break;
+        } else {
+          // [------------------------------------[
+          // interval->start_        interval->end_
+          //                     [*****************************[
+          //                     start                       end
+          interval->end_ = start;
+        }
+      }
+    }
+    // Insert the remaining interval from start to end.
+    InsertInterval(manager, interval, cost, position, start, end);
+  }
+}
+
+static int BackwardReferencesHashChainDistanceOnly(
+    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
+    const VP8LHashChain* const hash_chain, const VP8LBackwardRefs* const refs,
+    uint16_t* const dist_array) {
+  int i;
+  int ok = 0;
+  int cc_init = 0;
+  const int pix_count = xsize * ysize;
+  const int use_color_cache = (cache_bits > 0);
+  const size_t literal_array_size =
+      sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
+                        ((cache_bits > 0) ? (1 << cache_bits) : 0));
+  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
+  CostModel* const cost_model =
+      (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
+  VP8LColorCache hashers;
+  CostManager* cost_manager =
+      (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
+  int offset_prev = -1, len_prev = -1;
+  double offset_cost = -1;
+  int first_offset_is_constant = -1;  // initialized with 'impossible' value
+  int reach = 0;
+
+  if (cost_model == NULL || cost_manager == NULL) goto Error;
+
+  cost_model->literal_ = (double*)(cost_model + 1);
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  if (!CostModelBuild(cost_model, xsize, cache_bits, refs)) {
+    goto Error;
+  }
+
+  if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) {
+    goto Error;
+  }
+
+  // We loop one pixel at a time, but store all currently best points to
+  // non-processed locations from this point.
+  dist_array[0] = 0;
+  // Add first pixel as literal.
+  AddSingleLiteralWithCostModel(argb, &hashers, cost_model, 0, use_color_cache,
+                                0.f, cost_manager->costs_, dist_array);
+
+  for (i = 1; i < pix_count; ++i) {
+    const float prev_cost = cost_manager->costs_[i - 1];
+    int offset, len;
+    VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
+
+    // Try adding the pixel as a literal.
+    AddSingleLiteralWithCostModel(argb, &hashers, cost_model, i,
+                                  use_color_cache, prev_cost,
+                                  cost_manager->costs_, dist_array);
+
+    // If we are dealing with a non-literal.
+    if (len >= 2) {
+      if (offset != offset_prev) {
+        const int code = VP8LDistanceToPlaneCode(xsize, offset);
+        offset_cost = GetDistanceCost(cost_model, code);
+        first_offset_is_constant = 1;
+        PushInterval(cost_manager, prev_cost + offset_cost, i, len);
+      } else {
+        assert(offset_cost >= 0);
+        assert(len_prev >= 0);
+        assert(first_offset_is_constant == 0 || first_offset_is_constant == 1);
+        // Instead of considering all contributions from a pixel i by calling:
+        //         PushInterval(cost_manager, prev_cost + offset_cost, i, len);
+        // we optimize these contributions in case offset_cost stays the same
+        // for consecutive pixels. This describes a set of pixels similar to a
+        // previous set (e.g. constant color regions).
+        if (first_offset_is_constant) {
+          reach = i - 1 + len_prev - 1;
+          first_offset_is_constant = 0;
+        }
+
+        if (i + len - 1 > reach) {
+          // We can only be go further with the same offset if the previous
+          // length was maxed, hence len_prev == len == MAX_LENGTH.
+          // TODO(vrabaud), bump i to the end right away (insert cache and
+          // update cost).
+          // TODO(vrabaud), check if one of the points in between does not have
+          // a lower cost.
+          // Already consider the pixel at "reach" to add intervals that are
+          // better than whatever we add.
+          int offset_j, len_j = 0;
+          int j;
+          assert(len == MAX_LENGTH || len == pix_count - i);
+          // Figure out the last consecutive pixel within [i, reach + 1] with
+          // the same offset.
+          for (j = i; j <= reach; ++j) {
+            VP8LHashChainFindCopy(hash_chain, j + 1, &offset_j, &len_j);
+            if (offset_j != offset) {
+              VP8LHashChainFindCopy(hash_chain, j, &offset_j, &len_j);
+              break;
+            }
+          }
+          // Update the cost at j - 1 and j.
+          UpdateCostAtIndex(cost_manager, j - 1, 0);
+          UpdateCostAtIndex(cost_manager, j, 0);
+
+          PushInterval(cost_manager, cost_manager->costs_[j - 1] + offset_cost,
+                       j, len_j);
+          reach = j + len_j - 1;
+        }
+      }
+    }
+
+    UpdateCostAtIndex(cost_manager, i, 1);
+    offset_prev = offset;
+    len_prev = len;
+  }
+
+  ok = !refs->error_;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  CostManagerClear(cost_manager);
+  WebPSafeFree(cost_model);
+  WebPSafeFree(cost_manager);
+  return ok;
+}
+
+// We pack the path at the end of *dist_array and return
+// a pointer to this part of the array. Example:
+// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
+static void TraceBackwards(uint16_t* const dist_array,
+                           int dist_array_size,
+                           uint16_t** const chosen_path,
+                           int* const chosen_path_size) {
+  uint16_t* path = dist_array + dist_array_size;
+  uint16_t* cur = dist_array + dist_array_size - 1;
+  while (cur >= dist_array) {
+    const int k = *cur;
+    --path;
+    *path = k;
+    cur -= k;
+  }
+  *chosen_path = path;
+  *chosen_path_size = (int)(dist_array + dist_array_size - path);
+}
+
+static int BackwardReferencesHashChainFollowChosenPath(
+    const uint32_t* const argb, int cache_bits,
+    const uint16_t* const chosen_path, int chosen_path_size,
+    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) {
+  const int use_color_cache = (cache_bits > 0);
+  int ix;
+  int i = 0;
+  int ok = 0;
+  int cc_init = 0;
+  VP8LColorCache hashers;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  VP8LClearBackwardRefs(refs);
+  for (ix = 0; ix < chosen_path_size; ++ix) {
+    const int len = chosen_path[ix];
+    if (len != 1) {
+      int k;
+      const int offset = VP8LHashChainFindOffset(hash_chain, i);
+      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      i += len;
+    } else {
+      PixOrCopy v;
+      const int idx =
+          use_color_cache ? VP8LColorCacheContains(&hashers, argb[i]) : -1;
+      if (idx >= 0) {
+        // use_color_cache is true and hashers contains argb[i]
+        // push pixel as a color cache index
+        v = PixOrCopyCreateCacheIdx(idx);
+      } else {
+        if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+        v = PixOrCopyCreateLiteral(argb[i]);
+      }
+      VP8LBackwardRefsCursorAdd(refs, v);
+      ++i;
+    }
+  }
+  ok = !refs->error_;
+ Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  return ok;
+}
+
+// Returns 1 on success.
+extern int VP8LBackwardReferencesTraceBackwards(
+    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
+    const VP8LHashChain* const hash_chain,
+    const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
+int VP8LBackwardReferencesTraceBackwards(int xsize, int ysize,
+                                         const uint32_t* const argb,
+                                         int cache_bits,
+                                         const VP8LHashChain* const hash_chain,
+                                         const VP8LBackwardRefs* const refs_src,
+                                         VP8LBackwardRefs* const refs_dst) {
+  int ok = 0;
+  const int dist_array_size = xsize * ysize;
+  uint16_t* chosen_path = NULL;
+  int chosen_path_size = 0;
+  uint16_t* dist_array =
+      (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
+
+  if (dist_array == NULL) goto Error;
+
+  if (!BackwardReferencesHashChainDistanceOnly(
+          xsize, ysize, argb, cache_bits, hash_chain, refs_src, dist_array)) {
+    goto Error;
+  }
+  TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
+  if (!BackwardReferencesHashChainFollowChosenPath(
+          argb, cache_bits, chosen_path, chosen_path_size, hash_chain,
+          refs_dst)) {
+    goto Error;
+  }
+  ok = 1;
+ Error:
+  WebPSafeFree(dist_array);
+  return ok;
+}
diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.c b/thirdparty/libwebp/src/enc/backward_references_enc.c
new file mode 100644
index 0000000000..39230188b9
--- /dev/null
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.c
@@ -0,0 +1,943 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#include <assert.h>
+#include <math.h>
+
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/utils.h"
+
+#define MIN_BLOCK_SIZE 256  // minimum block size for backward references
+
+#define MAX_ENTROPY    (1e30f)
+
+// 1M window (4M bytes) minus 120 special codes for short distances.
+#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
+
+// Minimum number of pixels for which it is cheaper to encode a
+// distance + length instead of each pixel as a literal.
+#define MIN_LENGTH 4
+
+// -----------------------------------------------------------------------------
+
+static const uint8_t plane_to_code_lut[128] = {
+ 96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
+ 101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
+ 102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
+ 105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
+ 110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
+ 115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
+ 118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
+ 119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
+};
+
+extern int VP8LDistanceToPlaneCode(int xsize, int dist);
+int VP8LDistanceToPlaneCode(int xsize, int dist) {
+  const int yoffset = dist / xsize;
+  const int xoffset = dist - yoffset * xsize;
+  if (xoffset <= 8 && yoffset < 8) {
+    return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
+  } else if (xoffset > xsize - 8 && yoffset < 7) {
+    return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
+  }
+  return dist + 120;
+}
+
+// Returns the exact index where array1 and array2 are different. For an index
+// inferior or equal to best_len_match, the return value just has to be strictly
+// inferior to best_len_match. The current behavior is to return 0 if this index
+// is best_len_match, and the index itself otherwise.
+// If no two elements are the same, it returns max_limit.
+static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
+                                       const uint32_t* const array2,
+                                       int best_len_match, int max_limit) {
+  // Before 'expensive' linear match, check if the two arrays match at the
+  // current best length index.
+  if (array1[best_len_match] != array2[best_len_match]) return 0;
+
+  return VP8LVectorMismatch(array1, array2, max_limit);
+}
+
+// -----------------------------------------------------------------------------
+//  VP8LBackwardRefs
+
+struct PixOrCopyBlock {
+  PixOrCopyBlock* next_;   // next block (or NULL)
+  PixOrCopy* start_;       // data start
+  int size_;               // currently used size
+};
+
+extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
+void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  if (refs->tail_ != NULL) {
+    *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
+  }
+  refs->free_blocks_ = refs->refs_;
+  refs->tail_ = &refs->refs_;
+  refs->last_block_ = NULL;
+  refs->refs_ = NULL;
+}
+
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  VP8LClearBackwardRefs(refs);
+  while (refs->free_blocks_ != NULL) {
+    PixOrCopyBlock* const next = refs->free_blocks_->next_;
+    WebPSafeFree(refs->free_blocks_);
+    refs->free_blocks_ = next;
+  }
+}
+
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
+  assert(refs != NULL);
+  memset(refs, 0, sizeof(*refs));
+  refs->tail_ = &refs->refs_;
+  refs->block_size_ =
+      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
+}
+
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c;
+  c.cur_block_ = refs->refs_;
+  if (refs->refs_ != NULL) {
+    c.cur_pos = c.cur_block_->start_;
+    c.last_pos_ = c.cur_pos + c.cur_block_->size_;
+  } else {
+    c.cur_pos = NULL;
+    c.last_pos_ = NULL;
+  }
+  return c;
+}
+
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
+  PixOrCopyBlock* const b = c->cur_block_->next_;
+  c->cur_pos = (b == NULL) ? NULL : b->start_;
+  c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
+  c->cur_block_ = b;
+}
+
+// Create a new block, either from the free list or allocated
+static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
+  PixOrCopyBlock* b = refs->free_blocks_;
+  if (b == NULL) {   // allocate new memory chunk
+    const size_t total_size =
+        sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
+    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
+    if (b == NULL) {
+      refs->error_ |= 1;
+      return NULL;
+    }
+    b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
+  } else {  // recycle from free-list
+    refs->free_blocks_ = b->next_;
+  }
+  *refs->tail_ = b;
+  refs->tail_ = &b->next_;
+  refs->last_block_ = b;
+  b->next_ = NULL;
+  b->size_ = 0;
+  return b;
+}
+
+extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
+                                      const PixOrCopy v);
+void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
+                               const PixOrCopy v) {
+  PixOrCopyBlock* b = refs->last_block_;
+  if (b == NULL || b->size_ == refs->block_size_) {
+    b = BackwardRefsNewBlock(refs);
+    if (b == NULL) return;   // refs->error_ is set
+  }
+  b->start_[b->size_++] = v;
+}
+
+// -----------------------------------------------------------------------------
+// Hash chains
+
+int VP8LHashChainInit(VP8LHashChain* const p, int size) {
+  assert(p->size_ == 0);
+  assert(p->offset_length_ == NULL);
+  assert(size > 0);
+  p->offset_length_ =
+      (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
+  if (p->offset_length_ == NULL) return 0;
+  p->size_ = size;
+
+  return 1;
+}
+
+void VP8LHashChainClear(VP8LHashChain* const p) {
+  assert(p != NULL);
+  WebPSafeFree(p->offset_length_);
+
+  p->size_ = 0;
+  p->offset_length_ = NULL;
+}
+
+// -----------------------------------------------------------------------------
+
+#define HASH_MULTIPLIER_HI (0xc6a4a793ULL)
+#define HASH_MULTIPLIER_LO (0x5bd1e996ULL)
+
+static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
+  uint32_t key;
+  key  = (argb[1] * HASH_MULTIPLIER_HI) & 0xffffffffu;
+  key += (argb[0] * HASH_MULTIPLIER_LO) & 0xffffffffu;
+  key = key >> (32 - HASH_BITS);
+  return key;
+}
+
+// Returns the maximum number of hash chain lookups to do for a
+// given compression quality. Return value in range [8, 86].
+static int GetMaxItersForQuality(int quality) {
+  return 8 + (quality * quality) / 128;
+}
+
+static int GetWindowSizeForHashChain(int quality, int xsize) {
+  const int max_window_size = (quality > 75) ? WINDOW_SIZE
+                            : (quality > 50) ? (xsize << 8)
+                            : (quality > 25) ? (xsize << 6)
+                            : (xsize << 4);
+  assert(xsize > 0);
+  return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
+}
+
+static WEBP_INLINE int MaxFindCopyLength(int len) {
+  return (len < MAX_LENGTH) ? len : MAX_LENGTH;
+}
+
+int VP8LHashChainFill(VP8LHashChain* const p, int quality,
+                      const uint32_t* const argb, int xsize, int ysize,
+                      int low_effort) {
+  const int size = xsize * ysize;
+  const int iter_max = GetMaxItersForQuality(quality);
+  const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
+  int pos;
+  int argb_comp;
+  uint32_t base_position;
+  int32_t* hash_to_first_index;
+  // Temporarily use the p->offset_length_ as a hash chain.
+  int32_t* chain = (int32_t*)p->offset_length_;
+  assert(size > 0);
+  assert(p->size_ != 0);
+  assert(p->offset_length_ != NULL);
+
+  if (size <= 2) {
+    p->offset_length_[0] = p->offset_length_[size - 1] = 0;
+    return 1;
+  }
+
+  hash_to_first_index =
+      (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
+  if (hash_to_first_index == NULL) return 0;
+
+  // Set the int32_t array to -1.
+  memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
+  // Fill the chain linking pixels with the same hash.
+  argb_comp = (argb[0] == argb[1]);
+  for (pos = 0; pos < size - 2;) {
+    uint32_t hash_code;
+    const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]);
+    if (argb_comp && argb_comp_next) {
+      // Consecutive pixels with the same color will share the same hash.
+      // We therefore use a different hash: the color and its repetition
+      // length.
+      uint32_t tmp[2];
+      uint32_t len = 1;
+      tmp[0] = argb[pos];
+      // Figure out how far the pixels are the same.
+      // The last pixel has a different 64 bit hash, as its next pixel does
+      // not have the same color, so we just need to get to the last pixel equal
+      // to its follower.
+      while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) {
+        ++len;
+      }
+      if (len > MAX_LENGTH) {
+        // Skip the pixels that match for distance=1 and length>MAX_LENGTH
+        // because they are linked to their predecessor and we automatically
+        // check that in the main for loop below. Skipping means setting no
+        // predecessor in the chain, hence -1.
+        memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain));
+        pos += len - MAX_LENGTH;
+        len = MAX_LENGTH;
+      }
+      // Process the rest of the hash chain.
+      while (len) {
+        tmp[1] = len--;
+        hash_code = GetPixPairHash64(tmp);
+        chain[pos] = hash_to_first_index[hash_code];
+        hash_to_first_index[hash_code] = pos++;
+      }
+      argb_comp = 0;
+    } else {
+      // Just move one pixel forward.
+      hash_code = GetPixPairHash64(argb + pos);
+      chain[pos] = hash_to_first_index[hash_code];
+      hash_to_first_index[hash_code] = pos++;
+      argb_comp = argb_comp_next;
+    }
+  }
+  // Process the penultimate pixel.
+  chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
+
+  WebPSafeFree(hash_to_first_index);
+
+  // Find the best match interval at each pixel, defined by an offset to the
+  // pixel and a length. The right-most pixel cannot match anything to the right
+  // (hence a best length of 0) and the left-most pixel nothing to the left
+  // (hence an offset of 0).
+  assert(size > 2);
+  p->offset_length_[0] = p->offset_length_[size - 1] = 0;
+  for (base_position = size - 2; base_position > 0;) {
+    const int max_len = MaxFindCopyLength(size - 1 - base_position);
+    const uint32_t* const argb_start = argb + base_position;
+    int iter = iter_max;
+    int best_length = 0;
+    uint32_t best_distance = 0;
+    uint32_t best_argb;
+    const int min_pos =
+        (base_position > window_size) ? base_position - window_size : 0;
+    const int length_max = (max_len < 256) ? max_len : 256;
+    uint32_t max_base_position;
+
+    pos = chain[base_position];
+    if (!low_effort) {
+      int curr_length;
+      // Heuristic: use the comparison with the above line as an initialization.
+      if (base_position >= (uint32_t)xsize) {
+        curr_length = FindMatchLength(argb_start - xsize, argb_start,
+                                      best_length, max_len);
+        if (curr_length > best_length) {
+          best_length = curr_length;
+          best_distance = xsize;
+        }
+        --iter;
+      }
+      // Heuristic: compare to the previous pixel.
+      curr_length =
+          FindMatchLength(argb_start - 1, argb_start, best_length, max_len);
+      if (curr_length > best_length) {
+        best_length = curr_length;
+        best_distance = 1;
+      }
+      --iter;
+      // Skip the for loop if we already have the maximum.
+      if (best_length == MAX_LENGTH) pos = min_pos - 1;
+    }
+    best_argb = argb_start[best_length];
+
+    for (; pos >= min_pos && --iter; pos = chain[pos]) {
+      int curr_length;
+      assert(base_position > (uint32_t)pos);
+
+      if (argb[pos + best_length] != best_argb) continue;
+
+      curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len);
+      if (best_length < curr_length) {
+        best_length = curr_length;
+        best_distance = base_position - pos;
+        best_argb = argb_start[best_length];
+        // Stop if we have reached a good enough length.
+        if (best_length >= length_max) break;
+      }
+    }
+    // We have the best match but in case the two intervals continue matching
+    // to the left, we have the best matches for the left-extended pixels.
+    max_base_position = base_position;
+    while (1) {
+      assert(best_length <= MAX_LENGTH);
+      assert(best_distance <= WINDOW_SIZE);
+      p->offset_length_[base_position] =
+          (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
+      --base_position;
+      // Stop if we don't have a match or if we are out of bounds.
+      if (best_distance == 0 || base_position == 0) break;
+      // Stop if we cannot extend the matching intervals to the left.
+      if (base_position < best_distance ||
+          argb[base_position - best_distance] != argb[base_position]) {
+        break;
+      }
+      // Stop if we are matching at its limit because there could be a closer
+      // matching interval with the same maximum length. Then again, if the
+      // matching interval is as close as possible (best_distance == 1), we will
+      // never find anything better so let's continue.
+      if (best_length == MAX_LENGTH && best_distance != 1 &&
+          base_position + MAX_LENGTH < max_base_position) {
+        break;
+      }
+      if (best_length < MAX_LENGTH) {
+        ++best_length;
+        max_base_position = base_position;
+      }
+    }
+  }
+  return 1;
+}
+
+static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
+                                         VP8LColorCache* const hashers,
+                                         VP8LBackwardRefs* const refs) {
+  PixOrCopy v;
+  if (use_color_cache) {
+    const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
+    if (VP8LColorCacheLookup(hashers, key) == pixel) {
+      v = PixOrCopyCreateCacheIdx(key);
+    } else {
+      v = PixOrCopyCreateLiteral(pixel);
+      VP8LColorCacheSet(hashers, key, pixel);
+    }
+  } else {
+    v = PixOrCopyCreateLiteral(pixel);
+  }
+  VP8LBackwardRefsCursorAdd(refs, v);
+}
+
+static int BackwardReferencesRle(int xsize, int ysize,
+                                 const uint32_t* const argb,
+                                 int cache_bits, VP8LBackwardRefs* const refs) {
+  const int pix_count = xsize * ysize;
+  int i, k;
+  const int use_color_cache = (cache_bits > 0);
+  VP8LColorCache hashers;
+
+  if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
+    return 0;
+  }
+  VP8LClearBackwardRefs(refs);
+  // Add first pixel as literal.
+  AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
+  i = 1;
+  while (i < pix_count) {
+    const int max_len = MaxFindCopyLength(pix_count - i);
+    const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
+    const int prev_row_len = (i < xsize) ? 0 :
+        FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
+    if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
+      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
+      // We don't need to update the color cache here since it is always the
+      // same pixel being copied, and that does not change the color cache
+      // state.
+      i += rle_len;
+    } else if (prev_row_len >= MIN_LENGTH) {
+      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
+      if (use_color_cache) {
+        for (k = 0; k < prev_row_len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      i += prev_row_len;
+    } else {
+      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+      i++;
+    }
+  }
+  if (use_color_cache) VP8LColorCacheClear(&hashers);
+  return !refs->error_;
+}
+
+static int BackwardReferencesLz77(int xsize, int ysize,
+                                  const uint32_t* const argb, int cache_bits,
+                                  const VP8LHashChain* const hash_chain,
+                                  VP8LBackwardRefs* const refs) {
+  int i;
+  int i_last_check = -1;
+  int ok = 0;
+  int cc_init = 0;
+  const int use_color_cache = (cache_bits > 0);
+  const int pix_count = xsize * ysize;
+  VP8LColorCache hashers;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+  VP8LClearBackwardRefs(refs);
+  for (i = 0; i < pix_count;) {
+    // Alternative#1: Code the pixels starting at 'i' using backward reference.
+    int offset = 0;
+    int len = 0;
+    int j;
+    VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
+    if (len >= MIN_LENGTH) {
+      const int len_ini = len;
+      int max_reach = 0;
+      const int j_max =
+          (i + len_ini >= pix_count) ? pix_count - 1 : i + len_ini;
+      // Only start from what we have not checked already.
+      i_last_check = (i > i_last_check) ? i : i_last_check;
+      // We know the best match for the current pixel but we try to find the
+      // best matches for the current pixel AND the next one combined.
+      // The naive method would use the intervals:
+      // [i,i+len) + [i+len, length of best match at i+len)
+      // while we check if we can use:
+      // [i,j) (where j<=i+len) + [j, length of best match at j)
+      for (j = i_last_check + 1; j <= j_max; ++j) {
+        const int len_j = VP8LHashChainFindLength(hash_chain, j);
+        const int reach =
+            j + (len_j >= MIN_LENGTH ? len_j : 1);  // 1 for single literal.
+        if (reach > max_reach) {
+          len = j - i;
+          max_reach = reach;
+          if (max_reach >= pix_count) break;
+        }
+      }
+    } else {
+      len = 1;
+    }
+    // Go with literal or backward reference.
+    assert(len > 0);
+    if (len == 1) {
+      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+    } else {
+      VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+      if (use_color_cache) {
+        for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]);
+      }
+    }
+    i += len;
+  }
+
+  ok = !refs->error_;
+ Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  return ok;
+}
+
+// Compute an LZ77 by forcing matches to happen within a given distance cost.
+// We therefore limit the algorithm to the lowest 32 values in the PlaneCode
+// definition.
+#define WINDOW_OFFSETS_SIZE_MAX 32
+static int BackwardReferencesLz77Box(int xsize, int ysize,
+                                     const uint32_t* const argb, int cache_bits,
+                                     const VP8LHashChain* const hash_chain_best,
+                                     VP8LHashChain* hash_chain,
+                                     VP8LBackwardRefs* const refs) {
+  int i;
+  const int pix_count = xsize * ysize;
+  uint16_t* counts;
+  int window_offsets[WINDOW_OFFSETS_SIZE_MAX] = {0};
+  int window_offsets_new[WINDOW_OFFSETS_SIZE_MAX] = {0};
+  int window_offsets_size = 0;
+  int window_offsets_new_size = 0;
+  uint16_t* const counts_ini =
+      (uint16_t*)WebPSafeMalloc(xsize * ysize, sizeof(*counts_ini));
+  int best_offset_prev = -1, best_length_prev = -1;
+  if (counts_ini == NULL) return 0;
+
+  // counts[i] counts how many times a pixel is repeated starting at position i.
+  i = pix_count - 2;
+  counts = counts_ini + i;
+  counts[1] = 1;
+  for (; i >= 0; --i, --counts) {
+    if (argb[i] == argb[i + 1]) {
+      // Max out the counts to MAX_LENGTH.
+      counts[0] = counts[1] + (counts[1] != MAX_LENGTH);
+    } else {
+      counts[0] = 1;
+    }
+  }
+
+  // Figure out the window offsets around a pixel. They are stored in a
+  // spiraling order around the pixel as defined by VP8LDistanceToPlaneCode.
+  {
+    int x, y;
+    for (y = 0; y <= 6; ++y) {
+      for (x = -6; x <= 6; ++x) {
+        const int offset = y * xsize + x;
+        int plane_code;
+        // Ignore offsets that bring us after the pixel.
+        if (offset <= 0) continue;
+        plane_code = VP8LDistanceToPlaneCode(xsize, offset) - 1;
+        if (plane_code >= WINDOW_OFFSETS_SIZE_MAX) continue;
+        window_offsets[plane_code] = offset;
+      }
+    }
+    // For narrow images, not all plane codes are reached, so remove those.
+    for (i = 0; i < WINDOW_OFFSETS_SIZE_MAX; ++i) {
+      if (window_offsets[i] == 0) continue;
+      window_offsets[window_offsets_size++] = window_offsets[i];
+    }
+    // Given a pixel P, find the offsets that reach pixels unreachable from P-1
+    // with any of the offsets in window_offsets[].
+    for (i = 0; i < window_offsets_size; ++i) {
+      int j;
+      int is_reachable = 0;
+      for (j = 0; j < window_offsets_size && !is_reachable; ++j) {
+        is_reachable |= (window_offsets[i] == window_offsets[j] + 1);
+      }
+      if (!is_reachable) {
+        window_offsets_new[window_offsets_new_size] = window_offsets[i];
+        ++window_offsets_new_size;
+      }
+    }
+  }
+
+  hash_chain->offset_length_[0] = 0;
+  for (i = 1; i < pix_count; ++i) {
+    int ind;
+    int best_length = VP8LHashChainFindLength(hash_chain_best, i);
+    int best_offset;
+    int do_compute = 1;
+
+    if (best_length >= MAX_LENGTH) {
+      // Do not recompute the best match if we already have a maximal one in the
+      // window.
+      best_offset = VP8LHashChainFindOffset(hash_chain_best, i);
+      for (ind = 0; ind < window_offsets_size; ++ind) {
+        if (best_offset == window_offsets[ind]) {
+          do_compute = 0;
+          break;
+        }
+      }
+    }
+    if (do_compute) {
+      // Figure out if we should use the offset/length from the previous pixel
+      // as an initial guess and therefore only inspect the offsets in
+      // window_offsets_new[].
+      const int use_prev =
+          (best_length_prev > 1) && (best_length_prev < MAX_LENGTH);
+      const int num_ind =
+          use_prev ? window_offsets_new_size : window_offsets_size;
+      best_length = use_prev ? best_length_prev - 1 : 0;
+      best_offset = use_prev ? best_offset_prev : 0;
+      // Find the longest match in a window around the pixel.
+      for (ind = 0; ind < num_ind; ++ind) {
+        int curr_length = 0;
+        int j = i;
+        int j_offset =
+            use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind];
+        if (j_offset < 0 || argb[j_offset] != argb[i]) continue;
+        // The longest match is the sum of how many times each pixel is
+        // repeated.
+        do {
+          const int counts_j_offset = counts_ini[j_offset];
+          const int counts_j = counts_ini[j];
+          if (counts_j_offset != counts_j) {
+            curr_length +=
+                (counts_j_offset < counts_j) ? counts_j_offset : counts_j;
+            break;
+          }
+          // The same color is repeated counts_pos times at j_offset and j.
+          curr_length += counts_j_offset;
+          j_offset += counts_j_offset;
+          j += counts_j_offset;
+        } while (curr_length <= MAX_LENGTH && j < pix_count &&
+                 argb[j_offset] == argb[j]);
+        if (best_length < curr_length) {
+          best_offset =
+              use_prev ? window_offsets_new[ind] : window_offsets[ind];
+          if (curr_length >= MAX_LENGTH) {
+            best_length = MAX_LENGTH;
+            break;
+          } else {
+            best_length = curr_length;
+          }
+        }
+      }
+    }
+
+    assert(i + best_length <= pix_count);
+    assert(best_length <= MAX_LENGTH);
+    if (best_length <= MIN_LENGTH) {
+      hash_chain->offset_length_[i] = 0;
+      best_offset_prev = 0;
+      best_length_prev = 0;
+    } else {
+      hash_chain->offset_length_[i] =
+          (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
+      best_offset_prev = best_offset;
+      best_length_prev = best_length;
+    }
+  }
+  hash_chain->offset_length_[0] = 0;
+  WebPSafeFree(counts_ini);
+
+  return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
+                                refs);
+}
+
+// -----------------------------------------------------------------------------
+
+static void BackwardReferences2DLocality(int xsize,
+                                         const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    if (PixOrCopyIsCopy(c.cur_pos)) {
+      const int dist = c.cur_pos->argb_or_distance;
+      const int transformed_dist = VP8LDistanceToPlaneCode(xsize, dist);
+      c.cur_pos->argb_or_distance = transformed_dist;
+    }
+    VP8LRefsCursorNext(&c);
+  }
+}
+
+// Evaluate optimal cache bits for the local color cache.
+// The input *best_cache_bits sets the maximum cache bits to use (passing 0
+// implies disabling the local color cache). The local color cache is also
+// disabled for the lower (<= 25) quality.
+// Returns 0 in case of memory error.
+static int CalculateBestCacheSize(const uint32_t* argb, int quality,
+                                  const VP8LBackwardRefs* const refs,
+                                  int* const best_cache_bits) {
+  int i;
+  const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
+  double entropy_min = MAX_ENTROPY;
+  int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
+  VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
+  int ok = 0;
+
+  assert(cache_bits_max >= 0 && cache_bits_max <= MAX_COLOR_CACHE_BITS);
+
+  if (cache_bits_max == 0) {
+    *best_cache_bits = 0;
+    // Local color cache is disabled.
+    return 1;
+  }
+
+  // Allocate data.
+  for (i = 0; i <= cache_bits_max; ++i) {
+    histos[i] = VP8LAllocateHistogram(i);
+    if (histos[i] == NULL) goto Error;
+    if (i == 0) continue;
+    cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
+    if (!cc_init[i]) goto Error;
+  }
+
+  // Find the cache_bits giving the lowest entropy. The search is done in a
+  // brute-force way as the function (entropy w.r.t cache_bits) can be
+  // anything in practice.
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
+    if (PixOrCopyIsLiteral(v)) {
+      const uint32_t pix = *argb++;
+      const uint32_t a = (pix >> 24) & 0xff;
+      const uint32_t r = (pix >> 16) & 0xff;
+      const uint32_t g = (pix >>  8) & 0xff;
+      const uint32_t b = (pix >>  0) & 0xff;
+      // The keys of the caches can be derived from the longest one.
+      int key = VP8LHashPix(pix, 32 - cache_bits_max);
+      // Do not use the color cache for cache_bits = 0.
+      ++histos[0]->blue_[b];
+      ++histos[0]->literal_[g];
+      ++histos[0]->red_[r];
+      ++histos[0]->alpha_[a];
+      // Deal with cache_bits > 0.
+      for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
+        if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
+          ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
+        } else {
+          VP8LColorCacheSet(&hashers[i], key, pix);
+          ++histos[i]->blue_[b];
+          ++histos[i]->literal_[g];
+          ++histos[i]->red_[r];
+          ++histos[i]->alpha_[a];
+        }
+      }
+    } else {
+      // We should compute the contribution of the (distance,length)
+      // histograms but those are the same independently from the cache size.
+      // As those constant contributions are in the end added to the other
+      // histogram contributions, we can safely ignore them.
+      int len = PixOrCopyLength(v);
+      uint32_t argb_prev = *argb ^ 0xffffffffu;
+      // Update the color caches.
+      do {
+        if (*argb != argb_prev) {
+          // Efficiency: insert only if the color changes.
+          int key = VP8LHashPix(*argb, 32 - cache_bits_max);
+          for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
+            hashers[i].colors_[key] = *argb;
+          }
+          argb_prev = *argb;
+        }
+        argb++;
+      } while (--len != 0);
+    }
+    VP8LRefsCursorNext(&c);
+  }
+
+  for (i = 0; i <= cache_bits_max; ++i) {
+    const double entropy = VP8LHistogramEstimateBits(histos[i]);
+    if (i == 0 || entropy < entropy_min) {
+      entropy_min = entropy;
+      *best_cache_bits = i;
+    }
+  }
+  ok = 1;
+Error:
+  for (i = 0; i <= cache_bits_max; ++i) {
+    if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
+    VP8LFreeHistogram(histos[i]);
+  }
+  return ok;
+}
+
+// Update (in-place) backward references for specified cache_bits.
+static int BackwardRefsWithLocalCache(const uint32_t* const argb,
+                                      int cache_bits,
+                                      VP8LBackwardRefs* const refs) {
+  int pixel_index = 0;
+  VP8LColorCache hashers;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
+
+  while (VP8LRefsCursorOk(&c)) {
+    PixOrCopy* const v = c.cur_pos;
+    if (PixOrCopyIsLiteral(v)) {
+      const uint32_t argb_literal = v->argb_or_distance;
+      const int ix = VP8LColorCacheContains(&hashers, argb_literal);
+      if (ix >= 0) {
+        // hashers contains argb_literal
+        *v = PixOrCopyCreateCacheIdx(ix);
+      } else {
+        VP8LColorCacheInsert(&hashers, argb_literal);
+      }
+      ++pixel_index;
+    } else {
+      // refs was created without local cache, so it can not have cache indexes.
+      int k;
+      assert(PixOrCopyIsCopy(v));
+      for (k = 0; k < v->len; ++k) {
+        VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
+      }
+    }
+    VP8LRefsCursorNext(&c);
+  }
+  VP8LColorCacheClear(&hashers);
+  return 1;
+}
+
+static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
+    int width, int height, const uint32_t* const argb,
+    int* const cache_bits, const VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs_lz77) {
+  *cache_bits = 0;
+  if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) {
+    return NULL;
+  }
+  BackwardReferences2DLocality(width, refs_lz77);
+  return refs_lz77;
+}
+
+extern int VP8LBackwardReferencesTraceBackwards(
+    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
+    const VP8LHashChain* const hash_chain,
+    const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
+static VP8LBackwardRefs* GetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int lz77_types_to_try, int* const cache_bits,
+    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best,
+    VP8LBackwardRefs* worst) {
+  const int cache_bits_initial = *cache_bits;
+  double bit_cost_best = -1;
+  VP8LHistogram* histo = NULL;
+  int lz77_type, lz77_type_best = 0;
+  VP8LHashChain hash_chain_box;
+  memset(&hash_chain_box, 0, sizeof(hash_chain_box));
+
+  histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS);
+  if (histo == NULL) goto Error;
+
+  for (lz77_type = 1; lz77_types_to_try;
+       lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
+    int res = 0;
+    double bit_cost;
+    int cache_bits_tmp = cache_bits_initial;
+    if ((lz77_types_to_try & lz77_type) == 0) continue;
+    switch (lz77_type) {
+      case kLZ77RLE:
+        res = BackwardReferencesRle(width, height, argb, 0, worst);
+        break;
+      case kLZ77Standard:
+        // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
+        // cache is not that different in practice.
+        res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst);
+        break;
+      case kLZ77Box:
+        if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error;
+        res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain,
+                                        &hash_chain_box, worst);
+        break;
+      default:
+        assert(0);
+    }
+    if (!res) goto Error;
+
+    // Next, try with a color cache and update the references.
+    if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) {
+      goto Error;
+    }
+    if (cache_bits_tmp > 0) {
+      if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) {
+        goto Error;
+      }
+    }
+
+    // Keep the best backward references.
+    VP8LHistogramCreate(histo, worst, cache_bits_tmp);
+    bit_cost = VP8LHistogramEstimateBits(histo);
+    if (lz77_type_best == 0 || bit_cost < bit_cost_best) {
+      VP8LBackwardRefs* const tmp = worst;
+      worst = best;
+      best = tmp;
+      bit_cost_best = bit_cost;
+      *cache_bits = cache_bits_tmp;
+      lz77_type_best = lz77_type;
+    }
+  }
+  assert(lz77_type_best > 0);
+
+  // Improve on simple LZ77 but only for high quality (TraceBackwards is
+  // costly).
+  if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) &&
+      quality >= 25) {
+    const VP8LHashChain* const hash_chain_tmp =
+        (lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box;
+    if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits,
+                                             hash_chain_tmp, best, worst)) {
+      double bit_cost_trace;
+      VP8LHistogramCreate(histo, worst, *cache_bits);
+      bit_cost_trace = VP8LHistogramEstimateBits(histo);
+      if (bit_cost_trace < bit_cost_best) best = worst;
+    }
+  }
+
+  BackwardReferences2DLocality(width, best);
+
+Error:
+  VP8LHashChainClear(&hash_chain_box);
+  VP8LFreeHistogram(histo);
+  return best;
+}
+
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int low_effort, int lz77_types_to_try, int* const cache_bits,
+    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1,
+    VP8LBackwardRefs* const refs_tmp2) {
+  if (low_effort) {
+    return GetBackwardReferencesLowEffort(width, height, argb, cache_bits,
+                                          hash_chain, refs_tmp1);
+  } else {
+    return GetBackwardReferences(width, height, argb, quality,
+                                 lz77_types_to_try, cache_bits, hash_chain,
+                                 refs_tmp1, refs_tmp2);
+  }
+}
diff --git a/thirdparty/libwebp/enc/backward_references_enc.h b/thirdparty/libwebp/src/enc/backward_references_enc.h
index 3a19aa763e..103ddfdcb7 100644
--- a/thirdparty/libwebp/enc/backward_references_enc.h
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.h
@@ -10,13 +10,13 @@
 // Author: Jyrki Alakuijala (jyrki@google.com)
 //
 
-#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
-#define WEBP_ENC_BACKWARD_REFERENCES_H_
+#ifndef WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
+#define WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
 
 #include <assert.h>
 #include <stdlib.h>
-#include "../webp/types.h"
-#include "../webp/format_constants.h"
+#include "src/webp/types.h"
+#include "src/webp/format_constants.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -91,11 +91,6 @@ static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
   return p->len;
 }
 
-static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
-  assert(p->mode == kLiteral);
-  return p->argb_or_distance;
-}
-
 static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
   assert(p->mode == kCacheIdx);
   assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
@@ -113,6 +108,16 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
 #define HASH_BITS 18
 #define HASH_SIZE (1 << HASH_BITS)
 
+// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it
+// is used in VP8LHashChain.
+#define MAX_LENGTH_BITS 12
+#define WINDOW_SIZE_BITS 20
+// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits.
+#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1)
+#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32
+#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32"
+#endif
+
 typedef struct VP8LHashChain VP8LHashChain;
 struct VP8LHashChain {
   // The 20 most significant bits contain the offset at which the best match
@@ -134,6 +139,24 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
                       int low_effort);
 void VP8LHashChainClear(VP8LHashChain* const p);  // release memory
 
+static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
+                                               const int base_position) {
+  return p->offset_length_[base_position] >> MAX_LENGTH_BITS;
+}
+
+static WEBP_INLINE int VP8LHashChainFindLength(const VP8LHashChain* const p,
+                                               const int base_position) {
+  return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1);
+}
+
+static WEBP_INLINE void VP8LHashChainFindCopy(const VP8LHashChain* const p,
+                                              int base_position,
+                                              int* const offset_ptr,
+                                              int* const length_ptr) {
+  *offset_ptr = VP8LHashChainFindOffset(p, base_position);
+  *length_ptr = VP8LHashChainFindLength(p, base_position);
+}
+
 // -----------------------------------------------------------------------------
 // VP8LBackwardRefs (block-based backward-references storage)
 
@@ -158,9 +181,6 @@ struct VP8LBackwardRefs {
 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size);
 // Release memory for backward references.
 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs);
-// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error.
-int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
-                         VP8LBackwardRefs* const dst);
 
 // Cursor for iterating on references content
 typedef struct {
@@ -189,6 +209,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
 // -----------------------------------------------------------------------------
 // Main entry points
 
+enum VP8LLZ77Type {
+  kLZ77Standard = 1,
+  kLZ77RLE = 2,
+  kLZ77Box = 4
+};
+
 // Evaluates best possible backward references for specified quality.
 // The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache
 // bits to use (passing 0 implies disabling the local color cache).
@@ -197,11 +223,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
 // refs[0] or refs[1].
 VP8LBackwardRefs* VP8LGetBackwardReferences(
     int width, int height, const uint32_t* const argb, int quality,
-    int low_effort, int* const cache_bits,
-    const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]);
+    int low_effort, int lz77_types_to_try, int* const cache_bits,
+    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1,
+    VP8LBackwardRefs* const refs_tmp2);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // WEBP_ENC_BACKWARD_REFERENCES_H_
+#endif  // WEBP_ENC_BACKWARD_REFERENCES_ENC_H_
diff --git a/thirdparty/libwebp/enc/config_enc.c b/thirdparty/libwebp/src/enc/config_enc.c
index 4589dc0619..9d4828978e 100644
--- a/thirdparty/libwebp/enc/config_enc.c
+++ b/thirdparty/libwebp/src/enc/config_enc.c
@@ -12,10 +12,10 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
-#include "../webp/encode.h"
+#include "src/webp/encode.h"
 
 //------------------------------------------------------------------------------
 // WebPConfig
diff --git a/thirdparty/libwebp/enc/cost_enc.c b/thirdparty/libwebp/src/enc/cost_enc.c
index c823f5a664..48fd9bc347 100644
--- a/thirdparty/libwebp/enc/cost_enc.c
+++ b/thirdparty/libwebp/src/enc/cost_enc.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./cost_enc.h"
+#include "src/enc/cost_enc.h"
 
 //------------------------------------------------------------------------------
 // Level cost tables
diff --git a/thirdparty/libwebp/enc/cost_enc.h b/thirdparty/libwebp/src/enc/cost_enc.h
index 99e4b37aa3..bdce1e6a3b 100644
--- a/thirdparty/libwebp/enc/cost_enc.h
+++ b/thirdparty/libwebp/src/enc/cost_enc.h
@@ -11,12 +11,12 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_ENC_COST_H_
-#define WEBP_ENC_COST_H_
+#ifndef WEBP_ENC_COST_ENC_H_
+#define WEBP_ENC_COST_ENC_H_
 
 #include <assert.h>
 #include <stdlib.h>
-#include "./vp8i_enc.h"
+#include "src/enc/vp8i_enc.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -79,4 +79,4 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_ENC_COST_H_ */
+#endif  /* WEBP_ENC_COST_ENC_H_ */
diff --git a/thirdparty/libwebp/enc/delta_palettization_enc.c b/thirdparty/libwebp/src/enc/delta_palettization_enc.c
index eaf0f050ea..a61c8e6c93 100644
--- a/thirdparty/libwebp/enc/delta_palettization_enc.c
+++ b/thirdparty/libwebp/src/enc/delta_palettization_enc.c
@@ -10,11 +10,11 @@
 // Author: Mislav Bradac (mislavm@google.com)
 //
 
-#include "./delta_palettization_enc.h"
+#include "src/enc/delta_palettization_enc.h"
 
 #ifdef WEBP_EXPERIMENTAL_FEATURES
-#include "../webp/types.h"
-#include "../dsp/lossless.h"
+#include "src/webp/types.h"
+#include "src/dsp/lossless.h"
 
 #define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b))
 
diff --git a/thirdparty/libwebp/enc/delta_palettization_enc.h b/thirdparty/libwebp/src/enc/delta_palettization_enc.h
index 63048ec6e8..b15e2cd487 100644
--- a/thirdparty/libwebp/enc/delta_palettization_enc.h
+++ b/thirdparty/libwebp/src/enc/delta_palettization_enc.h
@@ -10,11 +10,11 @@
 // Author: Mislav Bradac (mislavm@google.com)
 //
 
-#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_
-#define WEBP_ENC_DELTA_PALETTIZATION_H_
+#ifndef WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
+#define WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
 
-#include "../webp/encode.h"
-#include "../enc/vp8li_enc.h"
+#include "src/webp/encode.h"
+#include "src/enc/vp8li_enc.h"
 
 // Replaces enc->argb_[] input by a palettizable approximation of it,
 // and generates optimal enc->palette_[].
@@ -22,4 +22,4 @@
 // if delta-palettization is not producing expected saving.
 WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc);
 
-#endif  // WEBP_ENC_DELTA_PALETTIZATION_H_
+#endif  // WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
diff --git a/thirdparty/libwebp/enc/filter_enc.c b/thirdparty/libwebp/src/enc/filter_enc.c
index 4bc367274c..580800bfb8 100644
--- a/thirdparty/libwebp/enc/filter_enc.c
+++ b/thirdparty/libwebp/src/enc/filter_enc.c
@@ -12,8 +12,8 @@
 // Author: somnath@google.com (Somnath Banerjee)
 
 #include <assert.h>
-#include "./vp8i_enc.h"
-#include "../dsp/dsp.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
 
 // This table gives, for a given sharpness, the filtering strength to be
 // used (at least) in order to filter a given edge step delta.
@@ -65,6 +65,8 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta) {
 //------------------------------------------------------------------------------
 // Paragraph 15.4: compute the inner-edge filtering strength
 
+#if !defined(WEBP_REDUCE_SIZE)
+
 static int GetILevel(int sharpness, int level) {
   if (sharpness > 0) {
     if (sharpness > 4) {
@@ -129,11 +131,14 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
   return sum;
 }
 
+#endif  // !defined(WEBP_REDUCE_SIZE)
+
 //------------------------------------------------------------------------------
 // Exposed APIs: Encoder should call the following 3 functions to adjust
 // loop filter strength
 
 void VP8InitFilter(VP8EncIterator* const it) {
+#if !defined(WEBP_REDUCE_SIZE)
   if (it->lf_stats_ != NULL) {
     int s, i;
     for (s = 0; s < NUM_MB_SEGMENTS; s++) {
@@ -143,9 +148,13 @@ void VP8InitFilter(VP8EncIterator* const it) {
     }
     VP8SSIMDspInit();
   }
+#else
+  (void)it;
+#endif
 }
 
 void VP8StoreFilterStats(VP8EncIterator* const it) {
+#if !defined(WEBP_REDUCE_SIZE)
   int d;
   VP8Encoder* const enc = it->enc_;
   const int s = it->mb_->segment_;
@@ -177,10 +186,14 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
     DoFilter(it, level);
     (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
   }
+#else  // defined(WEBP_REDUCE_SIZE)
+  (void)it;
+#endif  // !defined(WEBP_REDUCE_SIZE)
 }
 
 void VP8AdjustFilterStrength(VP8EncIterator* const it) {
   VP8Encoder* const enc = it->enc_;
+#if !defined(WEBP_REDUCE_SIZE)
   if (it->lf_stats_ != NULL) {
     int s;
     for (s = 0; s < NUM_MB_SEGMENTS; s++) {
@@ -196,7 +209,10 @@ void VP8AdjustFilterStrength(VP8EncIterator* const it) {
       }
       enc->dqm_[s].fstrength_ = best_level;
     }
-  } else if (enc->config_->filter_strength > 0) {
+    return;
+  }
+#endif  // !defined(WEBP_REDUCE_SIZE)
+  if (enc->config_->filter_strength > 0) {
     int max_level = 0;
     int s;
     for (s = 0; s < NUM_MB_SEGMENTS; s++) {
diff --git a/thirdparty/libwebp/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c
index abef523bbf..2b0dc66410 100644
--- a/thirdparty/libwebp/enc/frame_enc.c
+++ b/thirdparty/libwebp/src/enc/frame_enc.c
@@ -14,10 +14,10 @@
 #include <string.h>
 #include <math.h>
 
-#include "./cost_enc.h"
-#include "./vp8i_enc.h"
-#include "../dsp/dsp.h"
-#include "../webp/format_constants.h"  // RIFF constants
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/webp/format_constants.h"  // RIFF constants
 
 #define SEGMENT_VISU 0
 #define DEBUG_SEARCH 0    // useful to track search convergence
@@ -200,11 +200,13 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
     const VP8MBInfo* const mb = &enc->mb_info_[n];
     p[mb->segment_]++;
   }
+#if !defined(WEBP_DISABLE_STATS)
   if (enc->pic_->stats != NULL) {
     for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
       enc->pic_->stats->segment_size[n] = p[n];
     }
   }
+#endif
   if (enc->segment_hdr_.num_segments_ > 1) {
     uint8_t* const probas = enc->proba_.segments_;
     probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
@@ -452,6 +454,8 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
 //------------------------------------------------------------------------------
 // ExtraInfo map / Debug function
 
+#if !defined(WEBP_DISABLE_STATS)
+
 #if SEGMENT_VISU
 static void SetBlock(uint8_t* p, int value, int size) {
   int y;
@@ -516,6 +520,20 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
 #endif
 }
 
+#else  // defined(WEBP_DISABLE_STATS)
+static void ResetSSE(VP8Encoder* const enc) {
+  (void)enc;
+}
+static void StoreSideInfo(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  WebPPicture* const pic = enc->pic_;
+  if (pic->extra_info != NULL) {
+    memset(pic->extra_info, 0,
+           enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info));
+  }
+}
+#endif  // !defined(WEBP_DISABLE_STATS)
+
 static double GetPSNR(uint64_t mse, uint64_t size) {
   return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
 }
@@ -640,7 +658,7 @@ static int StatLoop(VP8Encoder* const enc) {
 // Main loops
 //
 
-static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
+static const uint8_t kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
 
 static int PreLoopInitialize(VP8Encoder* const enc) {
   int p;
@@ -670,6 +688,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
   }
 
   if (ok) {      // All good. Finish up.
+#if !defined(WEBP_DISABLE_STATS)
     if (enc->pic_->stats != NULL) {  // finalize byte counters...
       int i, s;
       for (i = 0; i <= 2; ++i) {
@@ -678,6 +697,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
         }
       }
     }
+#endif
     VP8AdjustFilterStrength(it);     // ...and store filter stats.
   } else {
     // Something bad happened -> need to do some memory cleanup.
diff --git a/thirdparty/libwebp/enc/histogram_enc.c b/thirdparty/libwebp/src/enc/histogram_enc.c
index 808b6f78ab..056a972dda 100644
--- a/thirdparty/libwebp/enc/histogram_enc.c
+++ b/thirdparty/libwebp/src/enc/histogram_enc.c
@@ -10,16 +10,16 @@
 // Author: Jyrki Alakuijala (jyrki@google.com)
 //
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
 #include <math.h>
 
-#include "./backward_references_enc.h"
-#include "./histogram_enc.h"
-#include "../dsp/lossless.h"
-#include "../dsp/lossless_common.h"
-#include "../utils/utils.h"
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/utils/utils.h"
 
 #define MAX_COST 1.e38
 
@@ -76,7 +76,7 @@ void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
                             VP8LHistogram* const histo) {
   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
   while (VP8LRefsCursorOk(&c)) {
-    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, NULL, 0);
     VP8LRefsCursorNext(&c);
   }
 }
@@ -138,7 +138,9 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
 // -----------------------------------------------------------------------------
 
 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
-                                     const PixOrCopy* const v) {
+                                     const PixOrCopy* const v,
+                                     int (*const distance_modifier)(int, int),
+                                     int distance_modifier_arg0) {
   if (PixOrCopyIsLiteral(v)) {
     ++histo->alpha_[PixOrCopyLiteral(v, 3)];
     ++histo->red_[PixOrCopyLiteral(v, 2)];
@@ -152,7 +154,13 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
     int code, extra_bits;
     VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
     ++histo->literal_[NUM_LITERAL_CODES + code];
-    VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+    if (distance_modifier == NULL) {
+      VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+    } else {
+      VP8LPrefixEncodeBits(
+          distance_modifier(distance_modifier_arg0, PixOrCopyDistance(v)),
+          &code, &extra_bits);
+    }
     ++histo->distance_[code];
   }
 }
@@ -473,7 +481,7 @@ static void HistogramBuild(
   while (VP8LRefsCursorOk(&c)) {
     const PixOrCopy* const v = c.cur_pos;
     const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
-    VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
+    VP8LHistogramAddSinglePixOrCopy(histograms[ix], v, NULL, 0);
     x += PixOrCopyLength(v);
     while (x >= xsize) {
       x -= xsize;
@@ -523,11 +531,12 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
 
 // Compact image_histo[] by merging some histograms with same bin_id together if
 // it's advantageous.
-static VP8LHistogram* HistogramCombineEntropyBin(
-    VP8LHistogramSet* const image_histo,
-    VP8LHistogram* cur_combo,
-    const uint16_t* const bin_map, int bin_map_size, int num_bins,
-    double combine_cost_factor, int low_effort) {
+static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
+                                       VP8LHistogram* cur_combo,
+                                       const uint16_t* const bin_map,
+                                       int bin_map_size, int num_bins,
+                                       double combine_cost_factor,
+                                       int low_effort) {
   VP8LHistogram** const histograms = image_histo->histograms;
   int idx;
   // Work in-place: processed histograms are put at the beginning of
@@ -593,14 +602,13 @@ static VP8LHistogram* HistogramCombineEntropyBin(
       UpdateHistogramCost(histograms[idx]);
     }
   }
-  return cur_combo;
 }
 
+// Implement a Lehmer random number generator with a multiplicative constant of
+// 48271 and a modulo constant of 2^31 − 1.
 static uint32_t MyRand(uint32_t* const seed) {
-  *seed = (*seed * 16807ull) & 0xffffffffu;
-  if (*seed == 0) {
-    *seed = 1;
-  }
+  *seed = (uint32_t)(((uint64_t)(*seed) * 48271u) % 2147483647u);
+  assert(*seed > 0);
   return *seed;
 }
 
@@ -641,57 +649,75 @@ static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
 static void HistoQueueClear(HistoQueue* const histo_queue) {
   assert(histo_queue != NULL);
   WebPSafeFree(histo_queue->queue);
+  histo_queue->size = 0;
+  histo_queue->max_size = 0;
 }
 
-static void SwapHistogramPairs(HistogramPair *p1,
-                               HistogramPair *p2) {
-  const HistogramPair tmp = *p1;
-  *p1 = *p2;
-  *p2 = tmp;
+// Pop a specific pair in the queue by replacing it with the last one
+// and shrinking the queue.
+static void HistoQueuePopPair(HistoQueue* const histo_queue,
+                              HistogramPair* const pair) {
+  assert(pair >= histo_queue->queue &&
+         pair < (histo_queue->queue + histo_queue->size));
+  assert(histo_queue->size > 0);
+  *pair = histo_queue->queue[histo_queue->size - 1];
+  --histo_queue->size;
 }
 
-// Given a valid priority queue in range [0, queue_size) this function checks
-// whether histo_queue[queue_size] should be accepted and swaps it with the
-// front if it is smaller. Otherwise, it leaves it as is.
-static void UpdateQueueFront(HistoQueue* const histo_queue) {
-  if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return;
-
-  if (histo_queue->queue[histo_queue->size].cost_diff <
-      histo_queue->queue[0].cost_diff) {
-    SwapHistogramPairs(histo_queue->queue,
-                       histo_queue->queue + histo_queue->size);
+// Check whether a pair in the queue should be updated as head or not.
+static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
+                                 HistogramPair* const pair) {
+  assert(pair->cost_diff < 0.);
+  assert(pair >= histo_queue->queue &&
+         pair < (histo_queue->queue + histo_queue->size));
+  assert(histo_queue->size > 0);
+  if (pair->cost_diff < histo_queue->queue[0].cost_diff) {
+    // Replace the best pair.
+    const HistogramPair tmp = histo_queue->queue[0];
+    histo_queue->queue[0] = *pair;
+    *pair = tmp;
   }
-  ++histo_queue->size;
-
-  // We cannot add more elements than the capacity.
-  // The allocation adds an extra element to the official capacity so that
-  // histo_queue->queue[histo_queue->max_size] is read/written within bound.
-  assert(histo_queue->size <= histo_queue->max_size);
 }
 
-// -----------------------------------------------------------------------------
-
-static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
-                        HistogramPair* const pair) {
-  VP8LHistogram* h1;
-  VP8LHistogram* h2;
+// Create a pair from indices "idx1" and "idx2" provided its cost
+// is inferior to "threshold", a negative entropy.
+// It returns the cost of the pair, or 0. if it superior to threshold.
+static double HistoQueuePush(HistoQueue* const histo_queue,
+                             VP8LHistogram** const histograms, int idx1,
+                             int idx2, double threshold) {
+  const VP8LHistogram* h1;
+  const VP8LHistogram* h2;
+  HistogramPair pair;
   double sum_cost;
 
+  assert(threshold <= 0.);
   if (idx1 > idx2) {
     const int tmp = idx2;
     idx2 = idx1;
     idx1 = tmp;
   }
-  pair->idx1 = idx1;
-  pair->idx2 = idx2;
+  pair.idx1 = idx1;
+  pair.idx2 = idx2;
   h1 = histograms[idx1];
   h2 = histograms[idx2];
   sum_cost = h1->bit_cost_ + h2->bit_cost_;
-  pair->cost_combo = 0.;
-  GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo);
-  pair->cost_diff = pair->cost_combo - sum_cost;
+  pair.cost_combo = 0.;
+  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair.cost_combo);
+  pair.cost_diff = pair.cost_combo - sum_cost;
+
+  // Do not even consider the pair if it does not improve the entropy.
+  if (pair.cost_diff >= threshold) return 0.;
+
+  // We cannot add more elements than the capacity.
+  assert(histo_queue->size < histo_queue->max_size);
+  histo_queue->queue[histo_queue->size++] = pair;
+  HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);
+
+  return pair.cost_diff;
 }
 
+// -----------------------------------------------------------------------------
+
 // Combines histograms by continuously choosing the one with the highest cost
 // reduction.
 static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
@@ -714,13 +740,11 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
     clusters[i] = i;
     for (j = i + 1; j < image_histo_size; ++j) {
       // Initialize positions array.
-      PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]);
-      UpdateQueueFront(&histo_queue);
+      HistoQueuePush(&histo_queue, histograms, i, j, 0.);
     }
   }
 
   while (image_histo_size > 1 && histo_queue.size > 0) {
-    HistogramPair* copy_to;
     const int idx1 = histo_queue.queue[0].idx1;
     const int idx2 = histo_queue.queue[0].idx2;
     HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
@@ -733,31 +757,22 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
     }
     --image_histo_size;
 
-    // Remove pairs intersecting the just combined best pair. This will
-    // therefore pop the head of the queue.
-    copy_to = histo_queue.queue;
-    for (i = 0; i < histo_queue.size; ++i) {
+    // Remove pairs intersecting the just combined best pair.
+    for (i = 0; i < histo_queue.size;) {
       HistogramPair* const p = histo_queue.queue + i;
       if (p->idx1 == idx1 || p->idx2 == idx1 ||
           p->idx1 == idx2 || p->idx2 == idx2) {
-        // Do not copy the invalid pair.
-        continue;
-      }
-      if (p->cost_diff < histo_queue.queue[0].cost_diff) {
-        // Replace the top of the queue if we found better.
-        SwapHistogramPairs(histo_queue.queue, p);
+        HistoQueuePopPair(&histo_queue, p);
+      } else {
+        HistoQueueUpdateHead(&histo_queue, p);
+        ++i;
       }
-      SwapHistogramPairs(copy_to, p);
-      ++copy_to;
     }
-    histo_queue.size = (int)(copy_to - histo_queue.queue);
 
     // Push new pairs formed with combined histogram to the queue.
     for (i = 0; i < image_histo_size; ++i) {
       if (clusters[i] != idx1) {
-        PreparePair(histograms, idx1, clusters[i],
-                    &histo_queue.queue[histo_queue.size]);
-        UpdateQueueFront(&histo_queue);
+        HistoQueuePush(&histo_queue, histograms, idx1, clusters[i], 0.);
       }
     }
   }
@@ -777,90 +792,130 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
   return ok;
 }
 
-static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
-                                       VP8LHistogram* tmp_histo,
-                                       VP8LHistogram* best_combo,
-                                       int quality, int min_cluster_size) {
+// Perform histogram aggregation using a stochastic approach.
+// 'do_greedy' is set to 1 if a greedy approach needs to be performed
+// afterwards, 0 otherwise.
+static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
+                                      int min_cluster_size,
+                                      int* const do_greedy) {
   int iter;
-  uint32_t seed = 0;
+  uint32_t seed = 1;
   int tries_with_no_success = 0;
   int image_histo_size = image_histo->size;
-  const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
-  const int outer_iters = image_histo_size * iter_mult;
-  const int num_pairs = image_histo_size / 2;
+  const int outer_iters = image_histo_size;
   const int num_tries_no_success = outer_iters / 2;
-  int idx2_max = image_histo_size - 1;
-  int do_brute_dorce = 0;
   VP8LHistogram** const histograms = image_histo->histograms;
+  // Priority queue of histogram pairs. Its size of "kCostHeapSizeSqrt"^2
+  // impacts the quality of the compression and the speed: the smaller the
+  // faster but the worse for the compression.
+  HistoQueue histo_queue;
+  const int kHistoQueueSizeSqrt = 3;
+  int ok = 0;
 
+  if (!HistoQueueInit(&histo_queue, kHistoQueueSizeSqrt)) {
+    goto End;
+  }
   // Collapse similar histograms in 'image_histo'.
   ++min_cluster_size;
-  for (iter = 0;
-       iter < outer_iters && image_histo_size >= min_cluster_size;
+  for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size &&
+                 ++tries_with_no_success < num_tries_no_success;
        ++iter) {
-    double best_cost_diff = 0.;
+    double best_cost =
+        (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
     int best_idx1 = -1, best_idx2 = 1;
     int j;
-    int num_tries =
-        (num_pairs < image_histo_size) ? num_pairs : image_histo_size;
-    // Use a brute force approach if:
-    // - stochastic has not worked for a while and
-    // - if the number of iterations for brute force is less than the number of
-    // iterations if we never find a match ever again stochastically (hence
-    // num_tries times the number of remaining outer iterations).
-    do_brute_dorce =
-        (tries_with_no_success > 10) &&
-        (idx2_max * (idx2_max + 1) < 2 * num_tries * (outer_iters - iter));
-    if (do_brute_dorce) num_tries = idx2_max;
-
-    seed += iter;
-    for (j = 0; j < num_tries; ++j) {
-      double curr_cost_diff;
-      // Choose two histograms at random and try to combine them.
-      uint32_t idx1, idx2;
-      if (do_brute_dorce) {
-        // Use a brute force approach.
-        idx1 = (uint32_t)j;
-        idx2 = (uint32_t)idx2_max;
-      } else {
-        const uint32_t tmp = (j & 7) + 1;
-        const uint32_t diff =
-            (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1);
-        idx1 = MyRand(&seed) % image_histo_size;
-        idx2 = (idx1 + diff + 1) % image_histo_size;
-        if (idx1 == idx2) {
-          continue;
-        }
-      }
+    const uint32_t rand_range = (image_histo_size - 1) * image_histo_size;
+    // image_histo_size / 2 was chosen empirically. Less means faster but worse
+    // compression.
+    const int num_tries = image_histo_size / 2;
 
-      // Calculate cost reduction on combining.
-      curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
-                                        tmp_histo, best_cost_diff);
-      if (curr_cost_diff < best_cost_diff) {  // found a better pair?
-        HistogramSwap(&best_combo, &tmp_histo);
-        best_cost_diff = curr_cost_diff;
-        best_idx1 = idx1;
-        best_idx2 = idx2;
+    for (j = 0; j < num_tries; ++j) {
+      double curr_cost;
+      // Choose two different histograms at random and try to combine them.
+      const uint32_t tmp = MyRand(&seed) % rand_range;
+      const uint32_t idx1 = tmp / (image_histo_size - 1);
+      uint32_t idx2 = tmp % (image_histo_size - 1);
+      if (idx2 >= idx1) ++idx2;
+
+      // Calculate cost reduction on combination.
+      curr_cost =
+          HistoQueuePush(&histo_queue, histograms, idx1, idx2, best_cost);
+      if (curr_cost < 0) {  // found a better pair?
+        best_cost = curr_cost;
+        // Empty the queue if we reached full capacity.
+        if (histo_queue.size == histo_queue.max_size) break;
       }
     }
-    if (do_brute_dorce) --idx2_max;
-
-    if (best_idx1 >= 0) {
-      HistogramSwap(&best_combo, &histograms[best_idx1]);
-      // swap best_idx2 slot with last one (which is now unused)
-      --image_histo_size;
-      if (idx2_max >= image_histo_size) idx2_max = image_histo_size - 1;
-      if (best_idx2 != image_histo_size) {
-        HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
-        histograms[image_histo_size] = NULL;
-      }
-      tries_with_no_success = 0;
+    if (histo_queue.size == 0) continue;
+
+    // Merge the two best histograms.
+    best_idx1 = histo_queue.queue[0].idx1;
+    best_idx2 = histo_queue.queue[0].idx2;
+    assert(best_idx1 < best_idx2);
+    HistogramAddEval(histograms[best_idx1], histograms[best_idx2],
+                     histograms[best_idx1], 0);
+    // Swap the best_idx2 histogram with the last one (which is now unused).
+    --image_histo_size;
+    if (best_idx2 != image_histo_size) {
+      HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
     }
-    if (++tries_with_no_success >= num_tries_no_success || idx2_max == 0) {
-      break;
+    histograms[image_histo_size] = NULL;
+    // Parse the queue and update each pair that deals with best_idx1,
+    // best_idx2 or image_histo_size.
+    for (j = 0; j < histo_queue.size;) {
+      HistogramPair* const p = histo_queue.queue + j;
+      const int is_idx1_best = p->idx1 == best_idx1 || p->idx1 == best_idx2;
+      const int is_idx2_best = p->idx2 == best_idx1 || p->idx2 == best_idx2;
+      int do_eval = 0;
+      // The front pair could have been duplicated by a random pick so
+      // check for it all the time nevertheless.
+      if (is_idx1_best && is_idx2_best) {
+        HistoQueuePopPair(&histo_queue, p);
+        continue;
+      }
+      // Any pair containing one of the two best indices should only refer to
+      // best_idx1. Its cost should also be updated.
+      if (is_idx1_best) {
+        p->idx1 = best_idx1;
+        do_eval = 1;
+      } else if (is_idx2_best) {
+        p->idx2 = best_idx1;
+        do_eval = 1;
+      }
+      if (p->idx2 == image_histo_size) {
+        // No need to re-evaluate here as it does not involve a pair
+        // containing best_idx1 or best_idx2.
+        p->idx2 = best_idx2;
+      }
+      assert(p->idx2 < image_histo_size);
+      // Make sure the index order is respected.
+      if (p->idx1 > p->idx2) {
+        const int tmp = p->idx2;
+        p->idx2 = p->idx1;
+        p->idx1 = tmp;
+      }
+      if (do_eval) {
+        // Re-evaluate the cost of an updated pair.
+        GetCombinedHistogramEntropy(histograms[p->idx1], histograms[p->idx2], 0,
+                                    &p->cost_diff);
+        if (p->cost_diff >= 0.) {
+          HistoQueuePopPair(&histo_queue, p);
+          continue;
+        }
+      }
+      HistoQueueUpdateHead(&histo_queue, p);
+      ++j;
     }
+
+    tries_with_no_success = 0;
   }
   image_histo->size = image_histo_size;
+  *do_greedy = (image_histo->size <= min_cluster_size);
+  ok = 1;
+
+End:
+  HistoQueueClear(&histo_queue);
+  return ok;
 }
 
 // -----------------------------------------------------------------------------
@@ -925,7 +980,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              int quality, int low_effort,
                              int histo_bits, int cache_bits,
                              VP8LHistogramSet* const image_histo,
-                             VP8LHistogramSet* const tmp_histos,
+                             VP8LHistogram* const tmp_histo,
                              uint16_t* const histogram_symbols) {
   int ok = 0;
   const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
@@ -933,7 +988,6 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
   const int image_histo_raw_size = histo_xsize * histo_ysize;
   VP8LHistogramSet* const orig_histo =
       VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
-  VP8LHistogram* cur_combo;
   // Don't attempt linear bin-partition heuristic for
   // histograms of small sizes (as bin_map will be very sparse) and
   // maximum quality q==100 (to preserve the compression gains at that level).
@@ -948,7 +1002,6 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
   // Copies the histograms and computes its bit_cost.
   HistogramCopyAndAnalyze(orig_histo, image_histo);
 
-  cur_combo = tmp_histos->histograms[1];  // pick up working slot
   if (entropy_combine) {
     const int bin_map_size = orig_histo->size;
     // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
@@ -958,10 +1011,9 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
 
     HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
     // Collapse histograms with similar entropy.
-    cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo,
-                                           bin_map, bin_map_size,
-                                           entropy_combine_num_bins,
-                                           combine_cost_factor, low_effort);
+    HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
+                               entropy_combine_num_bins, combine_cost_factor,
+                               low_effort);
   }
 
   // Don't combine the histograms using stochastic and greedy heuristics for
@@ -970,10 +1022,11 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
     const float x = quality / 100.f;
     // cubic ramp between 1 and MAX_HISTO_GREEDY:
     const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
-    HistogramCombineStochastic(image_histo, tmp_histos->histograms[0],
-                               cur_combo, quality, threshold_size);
-    if ((image_histo->size <= threshold_size) &&
-        !HistogramCombineGreedy(image_histo)) {
+    int do_greedy;
+    if (!HistogramCombineStochastic(image_histo, threshold_size, &do_greedy)) {
+      goto Error;
+    }
+    if (do_greedy && !HistogramCombineGreedy(image_histo)) {
       goto Error;
     }
   }
diff --git a/thirdparty/libwebp/enc/histogram_enc.h b/thirdparty/libwebp/src/enc/histogram_enc.h
index a9d258a166..15b1fbda34 100644
--- a/thirdparty/libwebp/enc/histogram_enc.h
+++ b/thirdparty/libwebp/src/enc/histogram_enc.h
@@ -11,14 +11,14 @@
 //
 // Models the histograms of literal and distance codes.
 
-#ifndef WEBP_ENC_HISTOGRAM_H_
-#define WEBP_ENC_HISTOGRAM_H_
+#ifndef WEBP_ENC_HISTOGRAM_ENC_H_
+#define WEBP_ENC_HISTOGRAM_ENC_H_
 
 #include <string.h>
 
-#include "./backward_references_enc.h"
-#include "../webp/format_constants.h"
-#include "../webp/types.h"
+#include "src/enc/backward_references_enc.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -90,7 +90,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
 
 // Accumulate a token 'v' into a histogram.
 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
-                                     const PixOrCopy* const v);
+                                     const PixOrCopy* const v,
+                                     int (*const distance_modifier)(int, int),
+                                     int distance_modifier_arg0);
 
 static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
   return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
@@ -103,7 +105,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              int quality, int low_effort,
                              int histogram_bits, int cache_bits,
                              VP8LHistogramSet* const image_in,
-                             VP8LHistogramSet* const tmp_histos,
+                             VP8LHistogram* const tmp_histo,
                              uint16_t* const histogram_symbols);
 
 // Returns the entropy for the symbols in the input array.
@@ -120,4 +122,4 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
 }
 #endif
 
-#endif  // WEBP_ENC_HISTOGRAM_H_
+#endif  // WEBP_ENC_HISTOGRAM_ENC_H_
diff --git a/thirdparty/libwebp/enc/iterator_enc.c b/thirdparty/libwebp/src/enc/iterator_enc.c
index e48d30bd31..cfacfd2401 100644
--- a/thirdparty/libwebp/enc/iterator_enc.c
+++ b/thirdparty/libwebp/src/enc/iterator_enc.c
@@ -13,7 +13,7 @@
 
 #include <string.h>
 
-#include "./vp8i_enc.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // VP8Iterator
diff --git a/thirdparty/libwebp/enc/near_lossless_enc.c b/thirdparty/libwebp/src/enc/near_lossless_enc.c
index 2bd03ab20d..cadd14c664 100644
--- a/thirdparty/libwebp/enc/near_lossless_enc.c
+++ b/thirdparty/libwebp/src/enc/near_lossless_enc.c
@@ -17,18 +17,20 @@
 #include <assert.h>
 #include <stdlib.h>
 
-#include "../dsp/lossless_common.h"
-#include "../utils/utils.h"
-#include "./vp8i_enc.h"
+#include "src/dsp/lossless_common.h"
+#include "src/utils/utils.h"
+#include "src/enc/vp8li_enc.h"
+
+#if (WEBP_NEAR_LOSSLESS == 1)
 
 #define MIN_DIM_FOR_NEAR_LOSSLESS 64
 #define MAX_LIMIT_BITS             5
 
 // Quantizes the value up or down to a multiple of 1<<bits (or to 255),
 // choosing the closer one, resolving ties using bankers' rounding.
-static int FindClosestDiscretized(int a, int bits) {
-  const int mask = (1 << bits) - 1;
-  const int biased = a + (mask >> 1) + ((a >> bits) & 1);
+static uint32_t FindClosestDiscretized(uint32_t a, int bits) {
+  const uint32_t mask = (1u << bits) - 1;
+  const uint32_t biased = a + (mask >> 1) + ((a >> bits) & 1);
   assert(bits > 0);
   if (biased > 0xff) return 0xff;
   return biased & ~mask;
@@ -69,22 +71,30 @@ static int IsSmooth(const uint32_t* const prev_row,
 }
 
 // Adjusts pixel values of image with given maximum error.
-static void NearLossless(int xsize, int ysize, uint32_t* argb,
-                         int limit_bits, uint32_t* copy_buffer) {
+static void NearLossless(int xsize, int ysize, const uint32_t* argb_src,
+                         int stride, int limit_bits, uint32_t* copy_buffer,
+                         uint32_t* argb_dst) {
   int x, y;
   const int limit = 1 << limit_bits;
   uint32_t* prev_row = copy_buffer;
   uint32_t* curr_row = prev_row + xsize;
   uint32_t* next_row = curr_row + xsize;
-  memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0]));
+  memcpy(curr_row, argb_src, xsize * sizeof(argb_src[0]));
+  memcpy(next_row, argb_src + stride, xsize * sizeof(argb_src[0]));
 
-  for (y = 1; y < ysize - 1; ++y) {
-    uint32_t* const curr_argb_row = argb + y * xsize;
-    uint32_t* const next_argb_row = curr_argb_row + xsize;
-    memcpy(next_row, next_argb_row, xsize * sizeof(argb[0]));
-    for (x = 1; x < xsize - 1; ++x) {
-      if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) {
-        curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits);
+  for (y = 0; y < ysize; ++y, argb_src += stride, argb_dst += xsize) {
+    if (y == 0 || y == ysize - 1) {
+      memcpy(argb_dst, argb_src, xsize * sizeof(argb_src[0]));
+    } else {
+      memcpy(next_row, argb_src + stride, xsize * sizeof(argb_src[0]));
+      argb_dst[0] = argb_src[0];
+      argb_dst[xsize - 1] = argb_src[xsize - 1];
+      for (x = 1; x < xsize - 1; ++x) {
+        if (IsSmooth(prev_row, curr_row, next_row, x, limit)) {
+          argb_dst[x] = curr_row[x];
+        } else {
+          argb_dst[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits);
+        }
       }
     }
     {
@@ -97,26 +107,45 @@ static void NearLossless(int xsize, int ysize, uint32_t* argb,
   }
 }
 
-int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) {
+int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
+                         uint32_t* const argb_dst) {
   int i;
+  const int xsize = picture->width;
+  const int ysize = picture->height;
+  const int stride = picture->argb_stride;
   uint32_t* const copy_buffer =
       (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer));
   const int limit_bits = VP8LNearLosslessBits(quality);
-  assert(argb != NULL);
-  assert(limit_bits >= 0);
+  assert(argb_dst != NULL);
+  assert(limit_bits > 0);
   assert(limit_bits <= MAX_LIMIT_BITS);
   if (copy_buffer == NULL) {
     return 0;
   }
   // For small icon images, don't attempt to apply near-lossless compression.
-  if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) {
+  if ((xsize < MIN_DIM_FOR_NEAR_LOSSLESS &&
+       ysize < MIN_DIM_FOR_NEAR_LOSSLESS) ||
+      ysize < 3) {
+    for (i = 0; i < ysize; ++i) {
+      memcpy(argb_dst + i * xsize, picture->argb + i * picture->argb_stride,
+             xsize * sizeof(*argb_dst));
+    }
     WebPSafeFree(copy_buffer);
     return 1;
   }
 
-  for (i = limit_bits; i != 0; --i) {
-    NearLossless(xsize, ysize, argb, i, copy_buffer);
+  NearLossless(xsize, ysize, picture->argb, stride, limit_bits, copy_buffer,
+               argb_dst);
+  for (i = limit_bits - 1; i != 0; --i) {
+    NearLossless(xsize, ysize, argb_dst, xsize, i, copy_buffer, argb_dst);
   }
   WebPSafeFree(copy_buffer);
   return 1;
 }
+#else  // (WEBP_NEAR_LOSSLESS == 1)
+
+// Define a stub to suppress compiler warnings.
+extern void VP8LNearLosslessStub(void);
+WEBP_TSAN_IGNORE_FUNCTION void VP8LNearLosslessStub(void) {}
+
+#endif  // (WEBP_NEAR_LOSSLESS == 1)
diff --git a/thirdparty/libwebp/enc/picture_csp_enc.c b/thirdparty/libwebp/src/enc/picture_csp_enc.c
index e5d1c75a66..d531dd0282 100644
--- a/thirdparty/libwebp/enc/picture_csp_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c
@@ -15,10 +15,12 @@
 #include <stdlib.h>
 #include <math.h>
 
-#include "./vp8i_enc.h"
-#include "../utils/random_utils.h"
-#include "../utils/utils.h"
-#include "../dsp/yuv.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/random_utils.h"
+#include "src/utils/utils.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/yuv.h"
 
 // Uncomment to disable gamma-compression during RGB->U/V averaging
 #define USE_GAMMA_COMPRESSION
@@ -39,12 +41,15 @@ static const union {
 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
                           int x_step, int y_step) {
   if (alpha == NULL) return 0;
-  while (height-- > 0) {
-    int x;
-    for (x = 0; x < width * x_step; x += x_step) {
-      if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
+  WebPInitAlphaProcessing();
+  if (x_step == 1) {
+    for (; height-- > 0; alpha += y_step) {
+      if (WebPHasAlpha8b(alpha, width)) return 1;
+    }
+  } else {
+    for (; height-- > 0; alpha += y_step) {
+      if (WebPHasAlpha32b(alpha, width)) return 1;
     }
-    alpha += y_step;
   }
   return 0;
 }
@@ -56,15 +61,10 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
     return CheckNonOpaque(picture->a, picture->width, picture->height,
                           1, picture->a_stride);
   } else {
-    int x, y;
-    const uint32_t* argb = picture->argb;
-    if (argb == NULL) return 0;
-    for (y = 0; y < picture->height; ++y) {
-      for (x = 0; x < picture->width; ++x) {
-        if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
-      }
-      argb += picture->argb_stride;
-    }
+    const int alpha_offset = ALPHA_IS_LAST ? 3 : 0;
+    return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
+                          picture->width, picture->height,
+                          4, picture->argb_stride * sizeof(*picture->argb));
   }
   return 0;
 }
@@ -171,7 +171,7 @@ typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
 #if defined(USE_GAMMA_COMPRESSION)
 
 // float variant of gamma-correction
-// We use tables of different size and precision for the Rec709
+// We use tables of different size and precision for the Rec709 / BT2020
 // transfer function.
 #define kGammaF (1./0.45)
 static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
@@ -183,8 +183,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
     int v;
     const double norm = 1. / MAX_Y_T;
     const double scale = 1. / kGammaTabSize;
-    const double a = 0.099;
-    const double thresh = 0.018;
+    const double a = 0.09929682680944;
+    const double thresh = 0.018053968510807;
     for (v = 0; v <= MAX_Y_T; ++v) {
       const double g = norm * v;
       if (g <= thresh * 4.5) {
@@ -856,7 +856,6 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
     return 0;
   }
   if (has_alpha) {
-    WebPInitAlphaProcessing();
     assert(step == 4);
 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
     assert(kAlphaFix + kGammaFix <= 31);
@@ -1085,40 +1084,45 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
 // automatic import / conversion
 
 static int Import(WebPPicture* const picture,
-                  const uint8_t* const rgb, int rgb_stride,
+                  const uint8_t* rgb, int rgb_stride,
                   int step, int swap_rb, int import_alpha) {
   int y;
   const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
   const uint8_t* g_ptr = rgb + 1;
   const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
-  const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
   const int width = picture->width;
   const int height = picture->height;
 
   if (!picture->use_argb) {
+    const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
                               0.f /* no dithering */, 0, picture);
   }
   if (!WebPPictureAlloc(picture)) return 0;
 
-  VP8EncDspARGBInit();
+  VP8LDspInit();
+  WebPInitAlphaProcessing();
 
   if (import_alpha) {
     uint32_t* dst = picture->argb;
+    const int do_copy =
+        (!swap_rb && !ALPHA_IS_LAST) || (swap_rb && ALPHA_IS_LAST);
     assert(step == 4);
     for (y = 0; y < height; ++y) {
-      VP8PackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst);
-      a_ptr += rgb_stride;
-      r_ptr += rgb_stride;
-      g_ptr += rgb_stride;
-      b_ptr += rgb_stride;
+      if (do_copy) {
+        memcpy(dst, rgb, width * 4);
+      } else {
+        // RGBA input order. Need to swap R and B.
+        VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
+      }
+      rgb += rgb_stride;
       dst += picture->argb_stride;
     }
   } else {
     uint32_t* dst = picture->argb;
     assert(step >= 3);
     for (y = 0; y < height; ++y) {
-      VP8PackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
+      WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
       r_ptr += rgb_stride;
       g_ptr += rgb_stride;
       b_ptr += rgb_stride;
@@ -1130,12 +1134,7 @@ static int Import(WebPPicture* const picture,
 
 // Public API
 
-int WebPPictureImportRGB(WebPPicture* picture,
-                         const uint8_t* rgb, int rgb_stride) {
-  return (picture != NULL && rgb != NULL)
-             ? Import(picture, rgb, rgb_stride, 3, 0, 0)
-             : 0;
-}
+#if !defined(WEBP_REDUCE_CSP)
 
 int WebPPictureImportBGR(WebPPicture* picture,
                          const uint8_t* rgb, int rgb_stride) {
@@ -1144,31 +1143,41 @@ int WebPPictureImportBGR(WebPPicture* picture,
              : 0;
 }
 
-int WebPPictureImportRGBA(WebPPicture* picture,
+int WebPPictureImportBGRA(WebPPicture* picture,
                           const uint8_t* rgba, int rgba_stride) {
   return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 0, 1)
+             ? Import(picture, rgba, rgba_stride, 4, 1, 1)
              : 0;
 }
 
-int WebPPictureImportBGRA(WebPPicture* picture,
+
+int WebPPictureImportBGRX(WebPPicture* picture,
                           const uint8_t* rgba, int rgba_stride) {
   return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 1, 1)
+             ? Import(picture, rgba, rgba_stride, 4, 1, 0)
              : 0;
 }
 
-int WebPPictureImportRGBX(WebPPicture* picture,
+#endif   // WEBP_REDUCE_CSP
+
+int WebPPictureImportRGB(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return (picture != NULL && rgb != NULL)
+             ? Import(picture, rgb, rgb_stride, 3, 0, 0)
+             : 0;
+}
+
+int WebPPictureImportRGBA(WebPPicture* picture,
                           const uint8_t* rgba, int rgba_stride) {
   return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 0, 0)
+             ? Import(picture, rgba, rgba_stride, 4, 0, 1)
              : 0;
 }
 
-int WebPPictureImportBGRX(WebPPicture* picture,
+int WebPPictureImportRGBX(WebPPicture* picture,
                           const uint8_t* rgba, int rgba_stride) {
   return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 1, 0)
+             ? Import(picture, rgba, rgba_stride, 4, 0, 0)
              : 0;
 }
 
diff --git a/thirdparty/libwebp/enc/picture_enc.c b/thirdparty/libwebp/src/enc/picture_enc.c
index dfa66510fb..c691622d03 100644
--- a/thirdparty/libwebp/enc/picture_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_enc.c
@@ -14,9 +14,9 @@
 #include <assert.h>
 #include <stdlib.h>
 
-#include "./vp8i_enc.h"
-#include "../dsp/dsp.h"
-#include "../utils/utils.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // WebPPicture
@@ -76,13 +76,12 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
   }
   // allocate a new buffer.
-  memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+  memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb));
   if (memory == NULL) {
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
   }
-  // TODO(skal): align plane to cache line?
   picture->memory_argb_ = memory;
-  picture->argb = (uint32_t*)memory;
+  picture->argb = (uint32_t*)WEBP_ALIGN(memory);
   picture->argb_stride = width;
   return 1;
 }
@@ -92,8 +91,8 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
       (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK);
   const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT;
   const int y_stride = width;
-  const int uv_width = (width + 1) >> 1;
-  const int uv_height = (height + 1) >> 1;
+  const int uv_width = (int)(((int64_t)width + 1) >> 1);
+  const int uv_height = (int)(((int64_t)height + 1) >> 1);
   const int uv_stride = uv_width;
   int a_width, a_stride;
   uint64_t y_size, uv_size, a_size, total_size;
@@ -118,8 +117,8 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
   total_size = y_size + a_size + 2 * uv_size;
 
   // Security and validation checks
-  if (width <= 0 || height <= 0 ||         // luma/alpha param error
-      uv_width < 0 || uv_height < 0) {     // u/v param error
+  if (width <= 0 || height <= 0 ||           // luma/alpha param error
+      uv_width <= 0 || uv_height <= 0) {     // u/v param error
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
   }
   // allocate a new buffer.
@@ -271,9 +270,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, float q,          \
 }
 
 ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB)
-ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
 ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA)
+#if !defined(WEBP_REDUCE_CSP)
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
 ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA)
+#endif  // WEBP_REDUCE_CSP
 
 #undef ENCODE_FUNC
 
@@ -284,9 +285,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {       \
 }
 
 LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB)
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
 LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA)
+#if !defined(WEBP_REDUCE_CSP)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
 LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA)
+#endif  // WEBP_REDUCE_CSP
 
 #undef LOSSLESS_ENCODE_FUNC
 
diff --git a/thirdparty/libwebp/enc/picture_psnr_enc.c b/thirdparty/libwebp/src/enc/picture_psnr_enc.c
index 9c0b229507..362a7c79be 100644
--- a/thirdparty/libwebp/enc/picture_psnr_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_psnr_enc.c
@@ -11,11 +11,15 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
+#include "src/webp/encode.h"
+
+#if !(defined(WEBP_DISABLE_STATS) || defined(WEBP_REDUCE_SIZE))
+
 #include <math.h>
 #include <stdlib.h>
 
-#include "./vp8i_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
 
 typedef double (*AccumulateFunc)(const uint8_t* src, int src_stride,
                                  const uint8_t* ref, int ref_stride,
@@ -210,4 +214,34 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
   return ok;
 }
 
-//------------------------------------------------------------------------------
+#else  // defined(WEBP_DISABLE_STATS)
+int WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
+                        const uint8_t* ref, size_t ref_stride,
+                        int width, int height, size_t x_step,
+                        int type, float* distortion, float* result) {
+  (void)src;
+  (void)src_stride;
+  (void)ref;
+  (void)ref_stride;
+  (void)width;
+  (void)height;
+  (void)x_step;
+  (void)type;
+  if (distortion == NULL || result == NULL) return 0;
+  *distortion = 0.f;
+  *result = 0.f;
+  return 1;
+}
+
+int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
+                          int type, float results[5]) {
+  int i;
+  (void)src;
+  (void)ref;
+  (void)type;
+  if (results == NULL) return 0;
+  for (i = 0; i < 5; ++i) results[i] = 0.f;
+  return 1;
+}
+
+#endif  // !defined(WEBP_DISABLE_STATS)
diff --git a/thirdparty/libwebp/enc/picture_rescale_enc.c b/thirdparty/libwebp/src/enc/picture_rescale_enc.c
index 0b7181c0d7..58a6ae7b9d 100644
--- a/thirdparty/libwebp/enc/picture_rescale_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c
@@ -11,12 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
+#include "src/webp/encode.h"
+
+#if !defined(WEBP_REDUCE_SIZE)
+
 #include <assert.h>
 #include <stdlib.h>
 
-#include "./vp8i_enc.h"
-#include "../utils/rescaler_utils.h"
-#include "../utils/utils.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/rescaler_utils.h"
+#include "src/utils/utils.h"
 
 #define HALVE(x) (((x) + 1) >> 1)
 
@@ -261,4 +265,45 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
   return 1;
 }
 
-//------------------------------------------------------------------------------
+#else  // defined(WEBP_REDUCE_SIZE)
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
+  (void)src;
+  (void)dst;
+  return 0;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+  (void)picture;
+  return 0;
+}
+
+int WebPPictureView(const WebPPicture* src,
+                    int left, int top, int width, int height,
+                    WebPPicture* dst) {
+  (void)src;
+  (void)left;
+  (void)top;
+  (void)width;
+  (void)height;
+  (void)dst;
+  return 0;
+}
+
+int WebPPictureCrop(WebPPicture* pic,
+                    int left, int top, int width, int height) {
+  (void)pic;
+  (void)left;
+  (void)top;
+  (void)width;
+  (void)height;
+  return 0;
+}
+
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+  (void)pic;
+  (void)width;
+  (void)height;
+  return 0;
+}
+#endif  // !defined(WEBP_REDUCE_SIZE)
diff --git a/thirdparty/libwebp/enc/picture_tools_enc.c b/thirdparty/libwebp/src/enc/picture_tools_enc.c
index 895df51156..be292d4391 100644
--- a/thirdparty/libwebp/enc/picture_tools_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c
@@ -13,8 +13,8 @@
 
 #include <assert.h>
 
-#include "./vp8i_enc.h"
-#include "../dsp/yuv.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/dsp/yuv.h"
 
 static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
   return (0xff000000u | (r << 16) | (g << 8) | b);
@@ -25,20 +25,7 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
 
 #define SIZE 8
 #define SIZE2 (SIZE / 2)
-static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
-  int y, x;
-  for (y = 0; y < size; ++y) {
-    for (x = 0; x < size; ++x) {
-      if (ptr[x]) {
-        return 0;
-      }
-    }
-    ptr += stride;
-  }
-  return 1;
-}
-
-static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) {
+static int IsTransparentARGBArea(const uint32_t* ptr, int stride, int size) {
   int y, x;
   for (y = 0; y < size; ++y) {
     for (x = 0; x < size; ++x) {
@@ -51,7 +38,7 @@ static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) {
   return 1;
 }
 
-static void flatten(uint8_t* ptr, int v, int stride, int size) {
+static void Flatten(uint8_t* ptr, int v, int stride, int size) {
   int y;
   for (y = 0; y < size; ++y) {
     memset(ptr, v, size);
@@ -59,7 +46,7 @@ static void flatten(uint8_t* ptr, int v, int stride, int size) {
   }
 }
 
-static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) {
+static void FlattenARGB(uint32_t* ptr, uint32_t v, int stride, int size) {
   int x, y;
   for (y = 0; y < size; ++y) {
     for (x = 0; x < size; ++x) ptr[x] = v;
@@ -67,54 +54,114 @@ static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) {
   }
 }
 
+// Smoothen the luma components of transparent pixels. Return true if the whole
+// block is transparent.
+static int SmoothenBlock(const uint8_t* a_ptr, int a_stride, uint8_t* y_ptr,
+                         int y_stride, int width, int height) {
+  int sum = 0, count = 0;
+  int x, y;
+  const uint8_t* alpha_ptr = a_ptr;
+  uint8_t* luma_ptr = y_ptr;
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      if (alpha_ptr[x] != 0) {
+        ++count;
+        sum += luma_ptr[x];
+      }
+    }
+    alpha_ptr += a_stride;
+    luma_ptr += y_stride;
+  }
+  if (count > 0 && count < width * height) {
+    const uint8_t avg_u8 = (uint8_t)(sum / count);
+    alpha_ptr = a_ptr;
+    luma_ptr = y_ptr;
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        if (alpha_ptr[x] == 0) luma_ptr[x] = avg_u8;
+      }
+      alpha_ptr += a_stride;
+      luma_ptr += y_stride;
+    }
+  }
+  return (count == 0);
+}
+
 void WebPCleanupTransparentArea(WebPPicture* pic) {
   int x, y, w, h;
   if (pic == NULL) return;
   w = pic->width / SIZE;
   h = pic->height / SIZE;
 
-  // note: we ignore the left-overs on right/bottom
+  // note: we ignore the left-overs on right/bottom, except for SmoothenBlock().
   if (pic->use_argb) {
     uint32_t argb_value = 0;
     for (y = 0; y < h; ++y) {
       int need_reset = 1;
       for (x = 0; x < w; ++x) {
         const int off = (y * pic->argb_stride + x) * SIZE;
-        if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) {
+        if (IsTransparentARGBArea(pic->argb + off, pic->argb_stride, SIZE)) {
           if (need_reset) {
             argb_value = pic->argb[off];
             need_reset = 0;
           }
-          flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE);
+          FlattenARGB(pic->argb + off, argb_value, pic->argb_stride, SIZE);
         } else {
           need_reset = 1;
         }
       }
     }
   } else {
-    const uint8_t* const a_ptr = pic->a;
+    const int width = pic->width;
+    const int height = pic->height;
+    const int y_stride = pic->y_stride;
+    const int uv_stride = pic->uv_stride;
+    const int a_stride = pic->a_stride;
+    uint8_t* y_ptr = pic->y;
+    uint8_t* u_ptr = pic->u;
+    uint8_t* v_ptr = pic->v;
+    const uint8_t* a_ptr = pic->a;
     int values[3] = { 0 };
-    if (a_ptr == NULL) return;    // nothing to do
-    for (y = 0; y < h; ++y) {
+    if (a_ptr == NULL || y_ptr == NULL || u_ptr == NULL || v_ptr == NULL) {
+      return;
+    }
+    for (y = 0; y + SIZE <= height; y += SIZE) {
       int need_reset = 1;
-      for (x = 0; x < w; ++x) {
-        const int off_a = (y * pic->a_stride + x) * SIZE;
-        const int off_y = (y * pic->y_stride + x) * SIZE;
-        const int off_uv = (y * pic->uv_stride + x) * SIZE2;
-        if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+      for (x = 0; x + SIZE <= width; x += SIZE) {
+        if (SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride,
+                          SIZE, SIZE)) {
           if (need_reset) {
-            values[0] = pic->y[off_y];
-            values[1] = pic->u[off_uv];
-            values[2] = pic->v[off_uv];
+            values[0] = y_ptr[x];
+            values[1] = u_ptr[x >> 1];
+            values[2] = v_ptr[x >> 1];
             need_reset = 0;
           }
-          flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
-          flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
-          flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+          Flatten(y_ptr + x,        values[0], y_stride,  SIZE);
+          Flatten(u_ptr + (x >> 1), values[1], uv_stride, SIZE2);
+          Flatten(v_ptr + (x >> 1), values[2], uv_stride, SIZE2);
         } else {
           need_reset = 1;
         }
       }
+      if (x < width) {
+        SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride,
+                      width - x, SIZE);
+      }
+      a_ptr += SIZE * a_stride;
+      y_ptr += SIZE * y_stride;
+      u_ptr += SIZE2 * uv_stride;
+      v_ptr += SIZE2 * uv_stride;
+    }
+    if (y < height) {
+      const int sub_height = height - y;
+      for (x = 0; x + SIZE <= width; x += SIZE) {
+        SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride,
+                      SIZE, sub_height);
+      }
+      if (x < width) {
+        SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride,
+                      width - x, sub_height);
+      }
     }
   }
 }
@@ -144,9 +191,9 @@ void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) {
 // Blend color and remove transparency info
 
 #define BLEND(V0, V1, ALPHA) \
-    ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16)
+    ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101 + 256) >> 16)
 #define BLEND_10BIT(V0, V1, ALPHA) \
-    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18)
+    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101 + 1024) >> 18)
 
 void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
   const int red = (background_rgb >> 16) & 0xff;
diff --git a/thirdparty/libwebp/enc/predictor_enc.c b/thirdparty/libwebp/src/enc/predictor_enc.c
index 0639b74f1c..f3715f515e 100644
--- a/thirdparty/libwebp/enc/predictor_enc.c
+++ b/thirdparty/libwebp/src/enc/predictor_enc.c
@@ -14,9 +14,9 @@
 //          Urvang Joshi (urvang@google.com)
 //          Vincent Rabaud (vrabaud@google.com)
 
-#include "../dsp/lossless.h"
-#include "../dsp/lossless_common.h"
-#include "./vp8li_enc.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/enc/vp8li_enc.h"
 
 #define MAX_DIFF_COST (1e30f)
 
@@ -26,7 +26,6 @@ static const uint32_t kMaskAlpha = 0xff000000;
 
 // Mostly used to reduce code size + readability
 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
-static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
 
 //------------------------------------------------------------------------------
 // Methods to calculate Entropy (Shannon).
@@ -90,6 +89,9 @@ static WEBP_INLINE void PredictBatch(int mode, int x_start, int y,
   }
 }
 
+#if (WEBP_NEAR_LOSSLESS == 1)
+static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
+
 static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {
   const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));
   const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));
@@ -180,6 +182,7 @@ static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict,
 // max_quantization which is a power of 2, smaller than max_diff). Take care if
 // value and predict have undergone subtract green, which means that red and
 // blue are represented as offsets from green.
+#define NEAR_LOSSLESS_DIFF(a, b) (uint8_t)((((int)(a) - (int)(b))) & 0xff)
 static uint32_t NearLossless(uint32_t value, uint32_t predict,
                              int max_quantization, int max_diff,
                              int used_subtract_green) {
@@ -196,7 +199,7 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
   }
   if ((value >> 24) == 0 || (value >> 24) == 0xff) {
     // Preserve transparency of fully transparent or fully opaque pixels.
-    a = ((value >> 24) - (predict >> 24)) & 0xff;
+    a = NEAR_LOSSLESS_DIFF(value >> 24, predict >> 24);
   } else {
     a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);
   }
@@ -209,15 +212,17 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
     // The amount by which green has been adjusted during quantization. It is
     // subtracted from red and blue for compensation, to avoid accumulating two
     // quantization errors in them.
-    green_diff = (new_green - (value >> 8)) & 0xff;
+    green_diff = NEAR_LOSSLESS_DIFF(new_green, value >> 8);
   }
-  r = NearLosslessComponent(((value >> 16) - green_diff) & 0xff,
+  r = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value >> 16, green_diff),
                             (predict >> 16) & 0xff, 0xff - new_green,
                             quantization);
-  b = NearLosslessComponent((value - green_diff) & 0xff, predict & 0xff,
-                            0xff - new_green, quantization);
+  b = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value, green_diff),
+                            predict & 0xff, 0xff - new_green, quantization);
   return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
 }
+#undef NEAR_LOSSLESS_DIFF
+#endif  // (WEBP_NEAR_LOSSLESS == 1)
 
 // Stores the difference between the pixel and its prediction in "out".
 // In case of a lossy encoding, updates the source image to avoid propagating
@@ -244,6 +249,7 @@ static WEBP_INLINE void GetResidual(
       } else {
         predict = pred_func(current_row[x - 1], upper_row + x);
       }
+#if (WEBP_NEAR_LOSSLESS == 1)
       if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
           x == 0 || x == width - 1) {
         residual = VP8LSubPixels(current_row[x], predict);
@@ -254,6 +260,13 @@ static WEBP_INLINE void GetResidual(
         current_row[x] = VP8LAddPixels(predict, residual);
         // x is never 0 here so we do not need to update upper_row like below.
       }
+#else
+      (void)max_diffs;
+      (void)height;
+      (void)max_quantization;
+      (void)used_subtract_green;
+      residual = VP8LSubPixels(current_row[x], predict);
+#endif
       if ((current_row[x] & kMaskAlpha) == 0) {
         // If alpha is 0, cleanup RGB. We can choose the RGB values of the
         // residual for best compression. The prediction of alpha itself can be
@@ -296,11 +309,12 @@ static int GetBestPredictorForTile(int width, int height,
   const int max_x = GetMin(tile_size, width - start_x);
   // Whether there exist columns just outside the tile.
   const int have_left = (start_x > 0);
-  const int have_right = (max_x < width - start_x);
   // Position and size of the strip covering the tile and adjacent columns if
   // they exist.
   const int context_start_x = start_x - have_left;
-  const int context_width = max_x + have_left + have_right;
+#if (WEBP_NEAR_LOSSLESS == 1)
+  const int context_width = max_x + have_left + (max_x < width - start_x);
+#endif
   const int tiles_per_row = VP8LSubSampleSize(width, bits);
   // Prediction modes of the left and above neighbor tiles.
   const int left_mode = (tile_x > 0) ?
@@ -352,10 +366,12 @@ static int GetBestPredictorForTile(int width, int height,
       memcpy(current_row + context_start_x,
              argb + y * width + context_start_x,
              sizeof(*argb) * (max_x + have_left + (y + 1 < height)));
+#if (WEBP_NEAR_LOSSLESS == 1)
       if (max_quantization > 1 && y >= 1 && y + 1 < height) {
         MaxDiffsForRow(context_width, width, argb + y * width + context_start_x,
                        max_diffs + context_start_x, used_subtract_green);
       }
+#endif
 
       GetResidual(width, height, upper_row, current_row, max_diffs, mode,
                   start_x, start_x + max_x, y, max_quantization, exact,
@@ -405,7 +421,9 @@ static void CopyImageWithPrediction(int width, int height,
   uint32_t* upper_row = argb_scratch;
   uint32_t* current_row = upper_row + width + 1;
   uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1);
+#if (WEBP_NEAR_LOSSLESS == 1)
   uint8_t* lower_max_diffs = current_max_diffs + width;
+#endif
   int y;
 
   for (y = 0; y < height; ++y) {
@@ -420,6 +438,7 @@ static void CopyImageWithPrediction(int width, int height,
       PredictBatch(kPredLowEffort, 0, y, width, current_row, upper_row,
                    argb + y * width);
     } else {
+#if (WEBP_NEAR_LOSSLESS == 1)
       if (max_quantization > 1) {
         // Compute max_diffs for the lower row now, because that needs the
         // contents of argb for the current row, which we will overwrite with
@@ -432,6 +451,7 @@ static void CopyImageWithPrediction(int width, int height,
                          used_subtract_green);
         }
       }
+#endif
       for (x = 0; x < width;) {
         const int mode =
             (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
diff --git a/thirdparty/libwebp/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c
index b118fb2a13..3b1a3129b5 100644
--- a/thirdparty/libwebp/enc/quant_enc.c
+++ b/thirdparty/libwebp/src/enc/quant_enc.c
@@ -15,8 +15,8 @@
 #include <math.h>
 #include <stdlib.h>  // for abs()
 
-#include "./vp8i_enc.h"
-#include "./cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/cost_enc.h"
 
 #define DO_TRELLIS_I4  1
 #define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
@@ -457,11 +457,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
 // Form the predictions in cache
 
 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
-const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
-const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
+const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
+const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
 
 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
-const int VP8I4ModeOffsets[NUM_BMODES] = {
+const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
   I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
 };
 
@@ -492,14 +492,14 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
 // |YYYY|....| 12
 // +----+----+
 
-const int VP8Scan[16] = {  // Luma
+const uint16_t VP8Scan[16] = {  // Luma
   0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
   0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
   0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
   0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
 };
 
-static const int VP8ScanUV[4 + 4] = {
+static const uint16_t VP8ScanUV[4 + 4] = {
   0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
   8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };
@@ -1162,7 +1162,7 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
     const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
     for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
       const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
-      const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT
+      const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT
                           + VP8FixedCostsI16[mode] * lambda_d_i16;
       if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
         continue;
diff --git a/thirdparty/libwebp/enc/syntax_enc.c b/thirdparty/libwebp/src/enc/syntax_enc.c
index 90665bd7e5..a9e5a6cf0f 100644
--- a/thirdparty/libwebp/enc/syntax_enc.c
+++ b/thirdparty/libwebp/src/enc/syntax_enc.c
@@ -13,10 +13,10 @@
 
 #include <assert.h>
 
-#include "../utils/utils.h"
-#include "../webp/format_constants.h"  // RIFF constants
-#include "../webp/mux_types.h"         // ALPHA_FLAG
-#include "./vp8i_enc.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"  // RIFF constants
+#include "src/webp/mux_types.h"         // ALPHA_FLAG
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Helper functions
@@ -289,11 +289,17 @@ static int GeneratePartition0(VP8Encoder* const enc) {
 
   pos3 = VP8BitWriterPos(bw);
 
+#if !defined(WEBP_DISABLE_STATS)
   if (enc->pic_->stats) {
     enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
     enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
     enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
   }
+#else
+  (void)pos1;
+  (void)pos2;
+  (void)pos3;
+#endif
   if (bw->error_) {
     return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
   }
diff --git a/thirdparty/libwebp/enc/token_enc.c b/thirdparty/libwebp/src/enc/token_enc.c
index 02a0d72cc6..3a2192acac 100644
--- a/thirdparty/libwebp/enc/token_enc.c
+++ b/thirdparty/libwebp/src/enc/token_enc.c
@@ -20,9 +20,9 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "./cost_enc.h"
-#include "./vp8i_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/utils/utils.h"
 
 #if !defined(DISABLE_TOKEN_BUFFER)
 
@@ -195,39 +195,6 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,
 #undef TOKEN_ID
 
 //------------------------------------------------------------------------------
-// This function works, but isn't currently used. Saved for later.
-
-#if 0
-
-static void Record(int bit, proba_t* const stats) {
-  proba_t p = *stats;
-  if (p >= 0xffff0000u) {               // an overflow is inbound.
-    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
-  }
-  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
-  p += 0x00010000u + bit;
-  *stats = p;
-}
-
-void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
-  const VP8Tokens* p = b->pages_;
-  while (p != NULL) {
-    const int N = (p->next_ == NULL) ? b->left_ : 0;
-    int n = MAX_NUM_TOKEN;
-    const token_t* const tokens = TOKEN_DATA(p);
-    while (n-- > N) {
-      const token_t token = tokens[n];
-      if (!(token & FIXED_PROBA_BIT)) {
-        Record((token >> 15) & 1, stats + (token & 0x3fffu));
-      }
-    }
-    p = p->next_;
-  }
-}
-
-#endif   // 0
-
-//------------------------------------------------------------------------------
 // Final coding pass, with known probabilities
 
 int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
@@ -283,8 +250,9 @@ size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) {
 
 #else     // DISABLE_TOKEN_BUFFER
 
-void VP8TBufferInit(VP8TBuffer* const b) {
+void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
   (void)b;
+  (void)page_size;
 }
 void VP8TBufferClear(VP8TBuffer* const b) {
   (void)b;
diff --git a/thirdparty/libwebp/enc/tree_enc.c b/thirdparty/libwebp/src/enc/tree_enc.c
index 2c40fe7f3d..64ed28360b 100644
--- a/thirdparty/libwebp/enc/tree_enc.c
+++ b/thirdparty/libwebp/src/enc/tree_enc.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./vp8i_enc.h"
+#include "src/enc/vp8i_enc.h"
 
 //------------------------------------------------------------------------------
 // Default probabilities
diff --git a/thirdparty/libwebp/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h
index 93c95ecbfb..3463491e9d 100644
--- a/thirdparty/libwebp/enc/vp8i_enc.h
+++ b/thirdparty/libwebp/src/enc/vp8i_enc.h
@@ -11,16 +11,16 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_ENC_VP8ENCI_H_
-#define WEBP_ENC_VP8ENCI_H_
+#ifndef WEBP_ENC_VP8I_ENC_H_
+#define WEBP_ENC_VP8I_ENC_H_
 
 #include <string.h>     // for memcpy()
-#include "../dec/common_dec.h"
-#include "../dsp/dsp.h"
-#include "../utils/bit_writer_utils.h"
-#include "../utils/thread_utils.h"
-#include "../utils/utils.h"
-#include "../webp/encode.h"
+#include "src/dec/common_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/bit_writer_utils.h"
+#include "src/utils/thread_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/encode.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 0
 #define ENC_MIN_VERSION 6
-#define ENC_REV_VERSION 0
+#define ENC_REV_VERSION 1
 
 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
        MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
@@ -75,10 +75,10 @@ typedef enum {   // Rate-distortion optimization levels
 #define U_OFF_ENC    (16)
 #define V_OFF_ENC    (16 + 8)
 
-extern const int VP8Scan[16];           // in quant.c
-extern const int VP8UVModeOffsets[4];   // in analyze.c
-extern const int VP8I16ModeOffsets[4];
-extern const int VP8I4ModeOffsets[NUM_BMODES];
+extern const uint16_t VP8Scan[16];
+extern const uint16_t VP8UVModeOffsets[4];
+extern const uint16_t VP8I16ModeOffsets[4];
+extern const uint16_t VP8I4ModeOffsets[NUM_BMODES];
 
 // Layout of prediction blocks
 // intra 16x16
@@ -330,9 +330,6 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,
 // Estimate the final coded size given a set of 'probas'.
 size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
 
-// unused for now
-void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
-
 #endif  // !DISABLE_TOKEN_BUFFER
 
 //------------------------------------------------------------------------------
@@ -502,19 +499,10 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
 // compressibility (no guarantee, though). Assumes that pic->use_argb is true.
 void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
 
-  // in near_lossless.c
-// Near lossless preprocessing in RGB color-space.
-int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
-// Near lossless adjustment for predictors.
-void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits,
-                                 const uint32_t* argb_orig,
-                                 uint32_t* argb, uint32_t* argb_scratch,
-                                 const uint32_t* const transform_data,
-                                 int quality, int subtract_green);
 //------------------------------------------------------------------------------
 
 #ifdef __cplusplus
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_ENC_VP8ENCI_H_ */
+#endif  /* WEBP_ENC_VP8I_ENC_H_ */
diff --git a/thirdparty/libwebp/enc/vp8l_enc.c b/thirdparty/libwebp/src/enc/vp8l_enc.c
index b1a793d956..312e521906 100644
--- a/thirdparty/libwebp/enc/vp8l_enc.c
+++ b/thirdparty/libwebp/src/enc/vp8l_enc.c
@@ -15,20 +15,19 @@
 #include <assert.h>
 #include <stdlib.h>
 
-#include "./backward_references_enc.h"
-#include "./histogram_enc.h"
-#include "./vp8i_enc.h"
-#include "./vp8li_enc.h"
-#include "../dsp/lossless.h"
-#include "../dsp/lossless_common.h"
-#include "../utils/bit_writer_utils.h"
-#include "../utils/huffman_encode_utils.h"
-#include "../utils/utils.h"
-#include "../webp/format_constants.h"
-
-#include "./delta_palettization_enc.h"
-
-#define PALETTE_KEY_RIGHT_SHIFT   22  // Key for 1K buffer.
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/vp8li_enc.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+#include "src/utils/bit_writer_utils.h"
+#include "src/utils/huffman_encode_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+
+#include "src/enc/delta_palettization_enc.h"
+
 // Maximum number of histogram images (sub-blocks).
 #define MAX_HUFF_IMAGE_SIZE       2600
 
@@ -128,7 +127,10 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
                                    uint32_t palette[MAX_PALETTE_SIZE],
                                    int* const palette_size) {
   const int num_colors = WebPGetColorPalette(pic, palette);
-  if (num_colors > MAX_PALETTE_SIZE) return 0;
+  if (num_colors > MAX_PALETTE_SIZE) {
+    *palette_size = 0;
+    return 0;
+  }
   *palette_size = num_colors;
   qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
   if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) {
@@ -188,22 +190,33 @@ static WEBP_INLINE uint32_t HashPix(uint32_t pix) {
 static int AnalyzeEntropy(const uint32_t* argb,
                           int width, int height, int argb_stride,
                           int use_palette,
+                          int palette_size, int transform_bits,
                           EntropyIx* const min_entropy_ix,
                           int* const red_and_blue_always_zero) {
   // Allocate histogram set with cache_bits = 0.
-  uint32_t* const histo =
-      (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256);
+  uint32_t* histo;
+
+  if (use_palette && palette_size <= 16) {
+    // In the case of small palettes, we pack 2, 4 or 8 pixels together. In
+    // practice, small palettes are better than any other transform.
+    *min_entropy_ix = kPalette;
+    *red_and_blue_always_zero = 1;
+    return 1;
+  }
+  histo = (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256);
   if (histo != NULL) {
     int i, x, y;
-    const uint32_t* prev_row = argb;
-    const uint32_t* curr_row = argb + argb_stride;
-    for (y = 1; y < height; ++y) {
-      uint32_t prev_pix = curr_row[0];
-      for (x = 1; x < width; ++x) {
+    const uint32_t* prev_row = NULL;
+    const uint32_t* curr_row = argb;
+    uint32_t pix_prev = argb[0];  // Skip the first pixel.
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
         const uint32_t pix = curr_row[x];
-        const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix);
-        if ((pix_diff == 0) || (pix == prev_row[x])) continue;
-        prev_pix = pix;
+        const uint32_t pix_diff = VP8LSubPixels(pix, pix_prev);
+        pix_prev = pix;
+        if ((pix_diff == 0) || (prev_row != NULL && pix == prev_row[x])) {
+          continue;
+        }
         AddSingle(pix,
                   &histo[kHistoAlpha * 256],
                   &histo[kHistoRed * 256],
@@ -264,8 +277,24 @@ static int AnalyzeEntropy(const uint32_t* argb,
           entropy_comp[kHistoRedPredSubGreen] +
           entropy_comp[kHistoGreenPred] +
           entropy_comp[kHistoBluePredSubGreen];
-      // Palette mode seems more efficient in a breakeven case. Bias with 1.0.
-      entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0;
+      entropy[kPalette] = entropy_comp[kHistoPalette];
+
+      // When including transforms, there is an overhead in bits from
+      // storing them. This overhead is small but matters for small images.
+      // For spatial, there are 14 transformations.
+      entropy[kSpatial] += VP8LSubSampleSize(width, transform_bits) *
+                           VP8LSubSampleSize(height, transform_bits) *
+                           VP8LFastLog2(14);
+      // For color transforms: 24 as only 3 channels are considered in a
+      // ColorTransformElement.
+      entropy[kSpatialSubGreen] += VP8LSubSampleSize(width, transform_bits) *
+                                   VP8LSubSampleSize(height, transform_bits) *
+                                   VP8LFastLog2(24);
+      // For palettes, add the cost of storing the palette.
+      // We empirically estimate the cost of a compressed entry as 8 bits.
+      // The palette is differential-coded when compressed hence a much
+      // lower cost than sizeof(uint32_t)*8.
+      entropy[kPalette] += palette_size * 8;
 
       *min_entropy_ix = kDirect;
       for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {
@@ -273,6 +302,7 @@ static int AnalyzeEntropy(const uint32_t* argb,
           *min_entropy_ix = (EntropyIx)k;
         }
       }
+      assert((int)*min_entropy_ix <= last_mode_to_analyze);
       *red_and_blue_always_zero = 1;
       // Let's check if the histogram of the chosen entropy mode has
       // non-zero red and blue values. If all are zero, we can later skip
@@ -325,60 +355,95 @@ static int GetTransformBits(int method, int histo_bits) {
   return res;
 }
 
-static int AnalyzeAndInit(VP8LEncoder* const enc) {
+// Set of parameters to be used in each iteration of the cruncher.
+#define CRUNCH_CONFIGS_LZ77_MAX 2
+typedef struct {
+  int entropy_idx_;
+  int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX];
+  int lz77s_types_to_try_size_;
+} CrunchConfig;
+
+#define CRUNCH_CONFIGS_MAX kNumEntropyIx
+
+static int EncoderAnalyze(VP8LEncoder* const enc,
+                          CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX],
+                          int* const crunch_configs_size,
+                          int* const red_and_blue_always_zero) {
   const WebPPicture* const pic = enc->pic_;
   const int width = pic->width;
   const int height = pic->height;
-  const int pix_cnt = width * height;
   const WebPConfig* const config = enc->config_;
   const int method = config->method;
   const int low_effort = (config->method == 0);
-  // we round the block size up, so we're guaranteed to have
-  // at max MAX_REFS_BLOCK_PER_IMAGE blocks used:
-  int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
+  int i;
+  int use_palette;
+  int n_lz77s;
   assert(pic != NULL && pic->argb != NULL);
 
-  enc->use_cross_color_ = 0;
-  enc->use_predict_ = 0;
-  enc->use_subtract_green_ = 0;
-  enc->use_palette_ =
+  use_palette =
       AnalyzeAndCreatePalette(pic, low_effort,
                               enc->palette_, &enc->palette_size_);
 
   // TODO(jyrki): replace the decision to be based on an actual estimate
   // of entropy, or even spatial variance of entropy.
-  enc->histo_bits_ = GetHistoBits(method, enc->use_palette_,
+  enc->histo_bits_ = GetHistoBits(method, use_palette,
                                   pic->width, pic->height);
   enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
 
   if (low_effort) {
     // AnalyzeEntropy is somewhat slow.
-    enc->use_predict_ = !enc->use_palette_;
-    enc->use_subtract_green_ = !enc->use_palette_;
-    enc->use_cross_color_ = 0;
+    crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen;
+    n_lz77s = 1;
+    *crunch_configs_size = 1;
   } else {
-    int red_and_blue_always_zero;
     EntropyIx min_entropy_ix;
-    if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride,
-                        enc->use_palette_, &min_entropy_ix,
-                        &red_and_blue_always_zero)) {
+    // Try out multiple LZ77 on images with few colors.
+    n_lz77s = (enc->palette_size_ > 0 && enc->palette_size_ <= 16) ? 2 : 1;
+    if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, use_palette,
+                        enc->palette_size_, enc->transform_bits_,
+                        &min_entropy_ix, red_and_blue_always_zero)) {
       return 0;
     }
-    enc->use_palette_ = (min_entropy_ix == kPalette);
-    enc->use_subtract_green_ =
-        (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen);
-    enc->use_predict_ =
-        (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen);
-    enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
+    if (method == 6 && config->quality == 100) {
+      // Go brute force on all transforms.
+      *crunch_configs_size = 0;
+      for (i = 0; i < kNumEntropyIx; ++i) {
+        if (i != kPalette || use_palette) {
+          assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX);
+          crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i;
+        }
+      }
+    } else {
+      // Only choose the guessed best transform.
+      *crunch_configs_size = 1;
+      crunch_configs[0].entropy_idx_ = min_entropy_ix;
+    }
+  }
+  // Fill in the different LZ77s.
+  assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX);
+  for (i = 0; i < *crunch_configs_size; ++i) {
+    int j;
+    for (j = 0; j < n_lz77s; ++j) {
+      crunch_configs[i].lz77s_types_to_try_[j] =
+          (j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box;
+    }
+    crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s;
   }
+  return 1;
+}
 
+static int EncoderInit(VP8LEncoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+  const int pix_cnt = width * height;
+  // we round the block size up, so we're guaranteed to have
+  // at most MAX_REFS_BLOCK_PER_IMAGE blocks used:
+  const int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
+  int i;
   if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
 
-  // palette-friendly input typically uses less literals
-  //  -> reduce block size a bit
-  if (enc->use_palette_) refs_block_size /= 2;
-  VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size);
-  VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size);
+  for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size);
 
   return 1;
 }
@@ -571,11 +636,16 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
     length = write_trimmed_length ? trimmed_length : num_tokens;
     VP8LPutBits(bw, write_trimmed_length, 1);
     if (write_trimmed_length) {
-      const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
-      const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
-      VP8LPutBits(bw, nbitpairs - 1, 3);
-      assert(trimmed_length >= 2);
-      VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2);
+      if (trimmed_length == 2) {
+        VP8LPutBits(bw, 0, 3 + 2);     // nbitpairs=1, trimmed_length=2
+      } else {
+        const int nbits = BitsLog2Floor(trimmed_length - 2);
+        const int nbitpairs = nbits / 2 + 1;
+        assert(trimmed_length > 2);
+        assert(nbitpairs - 1 < 8);
+        VP8LPutBits(bw, nbitpairs - 1, 3);
+        VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2);
+      }
     }
     StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
   }
@@ -642,7 +712,7 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
 
 static WebPEncodingError StoreImageToBitMask(
     VP8LBitWriter* const bw, int width, int histo_bits,
-    VP8LBackwardRefs* const refs,
+    const VP8LBackwardRefs* const refs,
     const uint16_t* histogram_symbols,
     const HuffmanTreeCode* const huffman_codes) {
   const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
@@ -665,7 +735,7 @@ static WebPEncodingError StoreImageToBitMask(
       codes = huffman_codes + 5 * histogram_ix;
     }
     if (PixOrCopyIsLiteral(v)) {
-      static const int order[] = { 1, 2, 0, 3 };
+      static const uint8_t order[] = { 1, 2, 0, 3 };
       int k;
       for (k = 0; k < 4; ++k) {
         const int code = PixOrCopyLiteral(v, order[k]);
@@ -705,7 +775,8 @@ static WebPEncodingError StoreImageToBitMask(
 static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
                                               const uint32_t* const argb,
                                               VP8LHashChain* const hash_chain,
-                                              VP8LBackwardRefs refs_array[2],
+                                              VP8LBackwardRefs* const refs_tmp1,
+                                              VP8LBackwardRefs* const refs_tmp2,
                                               int width, int height,
                                               int quality, int low_effort) {
   int i;
@@ -730,8 +801,9 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
-  refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits,
-                                   hash_chain, refs_array);
+  refs = VP8LGetBackwardReferences(width, height, argb, quality, 0,
+                                   kLZ77Standard | kLZ77RLE, &cache_bits,
+                                   hash_chain, refs_tmp1, refs_tmp2);
   if (refs == NULL) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
@@ -788,39 +860,37 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
   return err;
 }
 
-static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
-                                             const uint32_t* const argb,
-                                             VP8LHashChain* const hash_chain,
-                                             VP8LBackwardRefs refs_array[2],
-                                             int width, int height, int quality,
-                                             int low_effort,
-                                             int use_cache, int* cache_bits,
-                                             int histogram_bits,
-                                             size_t init_byte_position,
-                                             int* const hdr_size,
-                                             int* const data_size) {
+static WebPEncodingError EncodeImageInternal(
+    VP8LBitWriter* const bw, const uint32_t* const argb,
+    VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width,
+    int height, int quality, int low_effort, int use_cache,
+    const CrunchConfig* const config, int* cache_bits, int histogram_bits,
+    size_t init_byte_position, int* const hdr_size, int* const data_size) {
   WebPEncodingError err = VP8_ENC_OK;
   const uint32_t histogram_image_xysize =
       VP8LSubSampleSize(width, histogram_bits) *
       VP8LSubSampleSize(height, histogram_bits);
   VP8LHistogramSet* histogram_image = NULL;
-  VP8LHistogramSet* tmp_histos = NULL;
+  VP8LHistogram* tmp_histo = NULL;
   int histogram_image_size = 0;
   size_t bit_array_size = 0;
-  HuffmanTree* huff_tree = NULL;
+  HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
+      3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
   HuffmanTreeToken* tokens = NULL;
   HuffmanTreeCode* huffman_codes = NULL;
-  VP8LBackwardRefs refs;
-  VP8LBackwardRefs* best_refs;
+  VP8LBackwardRefs* refs_best;
+  VP8LBackwardRefs* refs_tmp;
   uint16_t* const histogram_symbols =
       (uint16_t*)WebPSafeMalloc(histogram_image_xysize,
                                 sizeof(*histogram_symbols));
+  int lz77s_idx;
+  VP8LBitWriter bw_init = *bw, bw_best;
+  int hdr_size_tmp;
   assert(histogram_bits >= MIN_HUFFMAN_BITS);
   assert(histogram_bits <= MAX_HUFFMAN_BITS);
   assert(hdr_size != NULL);
   assert(data_size != NULL);
 
-  VP8LBackwardRefsInit(&refs, refs_array[0].block_size_);
   if (histogram_symbols == NULL) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
@@ -836,142 +906,162 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
   // 'best_refs' is the reference to the best backward refs and points to one
   // of refs_array[0] or refs_array[1].
   // Calculate backward references from ARGB image.
-  if (!VP8LHashChainFill(hash_chain, quality, argb, width, height,
-                         low_effort)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-  best_refs = VP8LGetBackwardReferences(width, height, argb, quality,
-                                        low_effort, cache_bits, hash_chain,
-                                        refs_array);
-  if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-  histogram_image =
-      VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
-  tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits);
-  if (histogram_image == NULL || tmp_histos == NULL) {
+  if (huff_tree == NULL ||
+      !VP8LHashChainFill(hash_chain, quality, argb, width, height,
+                         low_effort) ||
+      !VP8LBitWriterInit(&bw_best, 0) ||
+      (config->lz77s_types_to_try_size_ > 1 &&
+       !VP8LBitWriterClone(bw, &bw_best))) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
+  for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_;
+       ++lz77s_idx) {
+    refs_best = VP8LGetBackwardReferences(
+        width, height, argb, quality, low_effort,
+        config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain,
+        &refs_array[0], &refs_array[1]);
+    if (refs_best == NULL) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    // Keep the best references aside and use the other element from the first
+    // two as a temporary for later usage.
+    refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0];
+
+    histogram_image =
+        VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
+    tmp_histo = VP8LAllocateHistogram(*cache_bits);
+    if (histogram_image == NULL || tmp_histo == NULL) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
 
-  // Build histogram image and symbols from backward references.
-  if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort,
-                                histogram_bits, *cache_bits, histogram_image,
-                                tmp_histos, histogram_symbols)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-  // Create Huffman bit lengths and codes for each histogram image.
-  histogram_image_size = histogram_image->size;
-  bit_array_size = 5 * histogram_image_size;
-  huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
-                                                   sizeof(*huffman_codes));
-  // Note: some histogram_image entries may point to tmp_histos[], so the latter
-  // need to outlive the following call to GetHuffBitLengthsAndCodes().
-  if (huffman_codes == NULL ||
-      !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-  // Free combined histograms.
-  VP8LFreeHistogramSet(histogram_image);
-  histogram_image = NULL;
+    // Build histogram image and symbols from backward references.
+    if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort,
+                                  histogram_bits, *cache_bits, histogram_image,
+                                  tmp_histo, histogram_symbols)) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    // Create Huffman bit lengths and codes for each histogram image.
+    histogram_image_size = histogram_image->size;
+    bit_array_size = 5 * histogram_image_size;
+    huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
+                                                     sizeof(*huffman_codes));
+    // Note: some histogram_image entries may point to tmp_histos[], so the
+    // latter need to outlive the following call to GetHuffBitLengthsAndCodes().
+    if (huffman_codes == NULL ||
+        !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    // Free combined histograms.
+    VP8LFreeHistogramSet(histogram_image);
+    histogram_image = NULL;
 
-  // Free scratch histograms.
-  VP8LFreeHistogramSet(tmp_histos);
-  tmp_histos = NULL;
+    // Free scratch histograms.
+    VP8LFreeHistogram(tmp_histo);
+    tmp_histo = NULL;
 
-  // Color Cache parameters.
-  if (*cache_bits > 0) {
-    VP8LPutBits(bw, 1, 1);
-    VP8LPutBits(bw, *cache_bits, 4);
-  } else {
-    VP8LPutBits(bw, 0, 1);
-  }
+    // Color Cache parameters.
+    if (*cache_bits > 0) {
+      VP8LPutBits(bw, 1, 1);
+      VP8LPutBits(bw, *cache_bits, 4);
+    } else {
+      VP8LPutBits(bw, 0, 1);
+    }
 
-  // Huffman image + meta huffman.
-  {
-    const int write_histogram_image = (histogram_image_size > 1);
-    VP8LPutBits(bw, write_histogram_image, 1);
-    if (write_histogram_image) {
-      uint32_t* const histogram_argb =
-          (uint32_t*)WebPSafeMalloc(histogram_image_xysize,
-                                    sizeof(*histogram_argb));
-      int max_index = 0;
-      uint32_t i;
-      if (histogram_argb == NULL) {
-        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-        goto Error;
-      }
-      for (i = 0; i < histogram_image_xysize; ++i) {
-        const int symbol_index = histogram_symbols[i] & 0xffff;
-        histogram_argb[i] = (symbol_index << 8);
-        if (symbol_index >= max_index) {
-          max_index = symbol_index + 1;
+    // Huffman image + meta huffman.
+    {
+      const int write_histogram_image = (histogram_image_size > 1);
+      VP8LPutBits(bw, write_histogram_image, 1);
+      if (write_histogram_image) {
+        uint32_t* const histogram_argb =
+            (uint32_t*)WebPSafeMalloc(histogram_image_xysize,
+                                      sizeof(*histogram_argb));
+        int max_index = 0;
+        uint32_t i;
+        if (histogram_argb == NULL) {
+          err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+          goto Error;
+        }
+        for (i = 0; i < histogram_image_xysize; ++i) {
+          const int symbol_index = histogram_symbols[i] & 0xffff;
+          histogram_argb[i] = (symbol_index << 8);
+          if (symbol_index >= max_index) {
+            max_index = symbol_index + 1;
+          }
         }
+        histogram_image_size = max_index;
+
+        VP8LPutBits(bw, histogram_bits - 2, 3);
+        err = EncodeImageNoHuffman(
+            bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2],
+            VP8LSubSampleSize(width, histogram_bits),
+            VP8LSubSampleSize(height, histogram_bits), quality, low_effort);
+        WebPSafeFree(histogram_argb);
+        if (err != VP8_ENC_OK) goto Error;
       }
-      histogram_image_size = max_index;
-
-      VP8LPutBits(bw, histogram_bits - 2, 3);
-      err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array,
-                                 VP8LSubSampleSize(width, histogram_bits),
-                                 VP8LSubSampleSize(height, histogram_bits),
-                                 quality, low_effort);
-      WebPSafeFree(histogram_argb);
-      if (err != VP8_ENC_OK) goto Error;
     }
-  }
 
-  // Store Huffman codes.
-  {
-    int i;
-    int max_tokens = 0;
-    huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES,
-                                             sizeof(*huff_tree));
-    if (huff_tree == NULL) {
-      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-      goto Error;
-    }
-    // Find maximum number of symbols for the huffman tree-set.
-    for (i = 0; i < 5 * histogram_image_size; ++i) {
-      HuffmanTreeCode* const codes = &huffman_codes[i];
-      if (max_tokens < codes->num_symbols) {
-        max_tokens = codes->num_symbols;
+    // Store Huffman codes.
+    {
+      int i;
+      int max_tokens = 0;
+      // Find maximum number of symbols for the huffman tree-set.
+      for (i = 0; i < 5 * histogram_image_size; ++i) {
+        HuffmanTreeCode* const codes = &huffman_codes[i];
+        if (max_tokens < codes->num_symbols) {
+          max_tokens = codes->num_symbols;
+        }
+      }
+      tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
+      if (tokens == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+      }
+      for (i = 0; i < 5 * histogram_image_size; ++i) {
+        HuffmanTreeCode* const codes = &huffman_codes[i];
+        StoreHuffmanCode(bw, huff_tree, tokens, codes);
+        ClearHuffmanTreeIfOnlyOneSymbol(codes);
       }
     }
-    tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens,
-                                               sizeof(*tokens));
-    if (tokens == NULL) {
-      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-      goto Error;
+    // Store actual literals.
+    hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
+    err = StoreImageToBitMask(bw, width, histogram_bits, refs_best,
+                              histogram_symbols, huffman_codes);
+    // Keep track of the smallest image so far.
+    if (lz77s_idx == 0 ||
+        VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) {
+      *hdr_size = hdr_size_tmp;
+      *data_size =
+          (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
+      VP8LBitWriterSwap(bw, &bw_best);
     }
-    for (i = 0; i < 5 * histogram_image_size; ++i) {
-      HuffmanTreeCode* const codes = &huffman_codes[i];
-      StoreHuffmanCode(bw, huff_tree, tokens, codes);
-      ClearHuffmanTreeIfOnlyOneSymbol(codes);
+    // Reset the bit writer for the following iteration if any.
+    if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw);
+    WebPSafeFree(tokens);
+    tokens = NULL;
+    if (huffman_codes != NULL) {
+      WebPSafeFree(huffman_codes->codes);
+      WebPSafeFree(huffman_codes);
+      huffman_codes = NULL;
     }
   }
-
-  *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
-  // Store actual literals.
-  err = StoreImageToBitMask(bw, width, histogram_bits, &refs,
-                            histogram_symbols, huffman_codes);
-  *data_size =
-        (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
+  VP8LBitWriterSwap(bw, &bw_best);
 
  Error:
   WebPSafeFree(tokens);
   WebPSafeFree(huff_tree);
   VP8LFreeHistogramSet(histogram_image);
-  VP8LFreeHistogramSet(tmp_histos);
-  VP8LBackwardRefsClear(&refs);
+  VP8LFreeHistogram(tmp_histo);
   if (huffman_codes != NULL) {
     WebPSafeFree(huffman_codes->codes);
     WebPSafeFree(huffman_codes);
   }
   WebPSafeFree(histogram_symbols);
+  VP8LBitWriterWipeOut(&bw_best);
   return err;
 }
 
@@ -1005,11 +1095,11 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
   VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
   assert(pred_bits >= 2);
   VP8LPutBits(bw, pred_bits - 2, 3);
-  return EncodeImageNoHuffman(bw, enc->transform_data_,
-                              (VP8LHashChain*)&enc->hash_chain_,
-                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
-                              transform_width, transform_height,
-                              quality, low_effort);
+  return EncodeImageNoHuffman(
+      bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
+      (VP8LBackwardRefs*)&enc->refs_[0],  // cast const away
+      (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height,
+      quality, low_effort);
 }
 
 static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
@@ -1026,11 +1116,11 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
   VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
   assert(ccolor_transform_bits >= 2);
   VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
-  return EncodeImageNoHuffman(bw, enc->transform_data_,
-                              (VP8LHashChain*)&enc->hash_chain_,
-                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
-                              transform_width, transform_height,
-                              quality, low_effort);
+  return EncodeImageNoHuffman(
+      bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
+      (VP8LBackwardRefs*)&enc->refs_[0],  // cast const away
+      (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height,
+      quality, low_effort);
 }
 
 // -----------------------------------------------------------------------------
@@ -1144,6 +1234,7 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
     }
     enc->transform_mem_ = mem;
     enc->transform_mem_size_ = (size_t)mem_size;
+    enc->argb_content_ = kEncoderNone;
   }
   enc->argb_ = mem;
   mem = (uint32_t*)WEBP_ALIGN(mem + image_size);
@@ -1164,11 +1255,13 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
   int y;
   err = AllocateTransformBuffer(enc, width, height);
   if (err != VP8_ENC_OK) return err;
+  if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK;
   for (y = 0; y < height; ++y) {
     memcpy(enc->argb_ + y * width,
            picture->argb + y * picture->argb_stride,
            width * sizeof(*enc->argb_));
   }
+  enc->argb_content_ = kEncoderARGB;
   assert(enc->current_width_ == width);
   return VP8_ENC_OK;
 }
@@ -1215,12 +1308,13 @@ static WEBP_INLINE uint32_t ApplyPaletteHash0(uint32_t color) {
 
 static WEBP_INLINE uint32_t ApplyPaletteHash1(uint32_t color) {
   // Forget about alpha.
-  return ((color & 0x00ffffffu) * 4222244071u) >> (32 - PALETTE_INV_SIZE_BITS);
+  return ((uint32_t)((color & 0x00ffffffu) * 4222244071ull)) >>
+         (32 - PALETTE_INV_SIZE_BITS);
 }
 
 static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) {
   // Forget about alpha.
-  return (color & 0x00ffffffu) * ((1u << 31) - 1) >>
+  return ((uint32_t)((color & 0x00ffffffu) * ((1ull << 31) - 1))) >>
          (32 - PALETTE_INV_SIZE_BITS);
 }
 
@@ -1346,6 +1440,7 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc,
   err = ApplyPalette(src, src_stride,
                      enc->argb_, enc->current_width_,
                      palette, palette_size, width, height, xbits);
+  enc->argb_content_ = kEncoderPalette;
   return err;
 }
 
@@ -1364,8 +1459,9 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
     tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
   }
   tmp_palette[0] = palette[0];
-  return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_,
-                              palette_size, 1, 20 /* quality */, low_effort);
+  return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_,
+                              &enc->refs_[0], &enc->refs_[1], palette_size, 1,
+                              20 /* quality */, low_effort);
 }
 
 #ifdef WEBP_EXPERIMENTAL_FEATURES
@@ -1400,10 +1496,11 @@ static WebPEncodingError EncodeDeltaPalettePredictorImage(
   VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
   VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
   VP8LPutBits(bw, pred_bits - 2, 3);
-  err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_,
-                             (VP8LBackwardRefs*)enc->refs_,  // cast const away
-                             transform_width, transform_height,
-                             quality, low_effort);
+  err = EncodeImageNoHuffman(
+      bw, predictors, &enc->hash_chain_,
+      (VP8LBackwardRefs*)&enc->refs_[0],  // cast const away
+      (VP8LBackwardRefs*)&enc->refs_[1],
+      transform_width, transform_height, quality, low_effort);
   WebPSafeFree(predictors);
   return err;
 }
@@ -1422,6 +1519,7 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
   }
   enc->config_ = config;
   enc->pic_ = picture;
+  enc->argb_content_ = kEncoderNone;
 
   VP8LEncDspInit();
 
@@ -1430,9 +1528,9 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
 
 static void VP8LEncoderDelete(VP8LEncoder* enc) {
   if (enc != NULL) {
+    int i;
     VP8LHashChainClear(&enc->hash_chain_);
-    VP8LBackwardRefsClear(&enc->refs_[0]);
-    VP8LBackwardRefsClear(&enc->refs_[1]);
+    for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]);
     ClearTransformBuffer(enc);
     WebPSafeFree(enc);
   }
@@ -1441,134 +1539,347 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) {
 // -----------------------------------------------------------------------------
 // Main call
 
-WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-                                   const WebPPicture* const picture,
-                                   VP8LBitWriter* const bw, int use_cache) {
+typedef struct {
+  const WebPConfig* config_;
+  const WebPPicture* picture_;
+  VP8LBitWriter* bw_;
+  VP8LEncoder* enc_;
+  int use_cache_;
+  CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX];
+  int num_crunch_configs_;
+  int red_and_blue_always_zero_;
+  WebPEncodingError err_;
+  WebPAuxStats* stats_;
+} StreamEncodeContext;
+
+static int EncodeStreamHook(void* input, void* data2) {
+  StreamEncodeContext* const params = (StreamEncodeContext*)input;
+  const WebPConfig* const config = params->config_;
+  const WebPPicture* const picture = params->picture_;
+  VP8LBitWriter* const bw = params->bw_;
+  VP8LEncoder* const enc = params->enc_;
+  const int use_cache = params->use_cache_;
+  const CrunchConfig* const crunch_configs = params->crunch_configs_;
+  const int num_crunch_configs = params->num_crunch_configs_;
+  const int red_and_blue_always_zero = params->red_and_blue_always_zero_;
+#if !defined(WEBP_DISABLE_STATS)
+  WebPAuxStats* const stats = params->stats_;
+#endif
   WebPEncodingError err = VP8_ENC_OK;
   const int quality = (int)config->quality;
   const int low_effort = (config->method == 0);
+#if (WEBP_NEAR_LOSSLESS == 1) || defined(WEBP_EXPERIMENTAL_FEATURES)
   const int width = picture->width;
+#endif
   const int height = picture->height;
-  VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
   const size_t byte_position = VP8LBitWriterNumBytes(bw);
+#if (WEBP_NEAR_LOSSLESS == 1)
   int use_near_lossless = 0;
+#endif
   int hdr_size = 0;
   int data_size = 0;
   int use_delta_palette = 0;
+  int idx;
+  size_t best_size = 0;
+  VP8LBitWriter bw_init = *bw, bw_best;
+  (void)data2;
 
-  if (enc == NULL) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-
-  // ---------------------------------------------------------------------------
-  // Analyze image (entropy, num_palettes etc)
-
-  if (!AnalyzeAndInit(enc)) {
+  if (!VP8LBitWriterInit(&bw_best, 0) ||
+      (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
 
-  // Apply near-lossless preprocessing.
-  use_near_lossless =
-      (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_;
-  if (use_near_lossless) {
-    if (!VP8ApplyNearLossless(width, height, picture->argb,
-                              config->near_lossless)) {
-      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-      goto Error;
+  for (idx = 0; idx < num_crunch_configs; ++idx) {
+    const int entropy_idx = crunch_configs[idx].entropy_idx_;
+    enc->use_palette_ = (entropy_idx == kPalette);
+    enc->use_subtract_green_ =
+        (entropy_idx == kSubGreen) || (entropy_idx == kSpatialSubGreen);
+    enc->use_predict_ =
+        (entropy_idx == kSpatial) || (entropy_idx == kSpatialSubGreen);
+    if (low_effort) {
+      enc->use_cross_color_ = 0;
+    } else {
+      enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
     }
-  }
+    // Reset any parameter in the encoder that is set in the previous iteration.
+    enc->cache_bits_ = 0;
+    VP8LBackwardRefsClear(&enc->refs_[0]);
+    VP8LBackwardRefsClear(&enc->refs_[1]);
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  if (config->use_delta_palette) {
-    enc->use_predict_ = 1;
-    enc->use_cross_color_ = 0;
-    enc->use_subtract_green_ = 0;
-    enc->use_palette_ = 1;
-    err = MakeInputImageCopy(enc);
-    if (err != VP8_ENC_OK) goto Error;
-    err = WebPSearchOptimalDeltaPalette(enc);
-    if (err != VP8_ENC_OK) goto Error;
-    if (enc->use_palette_) {
+#if (WEBP_NEAR_LOSSLESS == 1)
+    // Apply near-lossless preprocessing.
+    use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ &&
+                        !enc->use_predict_;
+    if (use_near_lossless) {
       err = AllocateTransformBuffer(enc, width, height);
       if (err != VP8_ENC_OK) goto Error;
-      err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort);
+      if ((enc->argb_content_ != kEncoderNearLossless) &&
+          !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+      }
+      enc->argb_content_ = kEncoderNearLossless;
+    } else {
+      enc->argb_content_ = kEncoderNone;
+    }
+#else
+    enc->argb_content_ = kEncoderNone;
+#endif
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (config->use_delta_palette) {
+      enc->use_predict_ = 1;
+      enc->use_cross_color_ = 0;
+      enc->use_subtract_green_ = 0;
+      enc->use_palette_ = 1;
+      if (enc->argb_content_ != kEncoderNearLossless &&
+          enc->argb_content_ != kEncoderPalette) {
+        err = MakeInputImageCopy(enc);
+        if (err != VP8_ENC_OK) goto Error;
+      }
+      err = WebPSearchOptimalDeltaPalette(enc);
       if (err != VP8_ENC_OK) goto Error;
-      use_delta_palette = 1;
+      if (enc->use_palette_) {
+        err = AllocateTransformBuffer(enc, width, height);
+        if (err != VP8_ENC_OK) goto Error;
+        err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort);
+        if (err != VP8_ENC_OK) goto Error;
+        use_delta_palette = 1;
+      }
     }
-  }
 #endif  // WEBP_EXPERIMENTAL_FEATURES
 
-  // Encode palette
-  if (enc->use_palette_) {
-    err = EncodePalette(bw, low_effort, enc);
-    if (err != VP8_ENC_OK) goto Error;
-    err = MapImageFromPalette(enc, use_delta_palette);
-    if (err != VP8_ENC_OK) goto Error;
-    // If using a color cache, do not have it bigger than the number of colors.
-    if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) {
-      enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1;
-    }
-  }
-  if (!use_delta_palette) {
-    // In case image is not packed.
-    if (enc->argb_ == NULL) {
-      err = MakeInputImageCopy(enc);
+    // Encode palette
+    if (enc->use_palette_) {
+      err = EncodePalette(bw, low_effort, enc);
+      if (err != VP8_ENC_OK) goto Error;
+      err = MapImageFromPalette(enc, use_delta_palette);
       if (err != VP8_ENC_OK) goto Error;
+      // If using a color cache, do not have it bigger than the number of
+      // colors.
+      if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) {
+        enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1;
+      }
     }
+    if (!use_delta_palette) {
+      // In case image is not packed.
+      if (enc->argb_content_ != kEncoderNearLossless &&
+          enc->argb_content_ != kEncoderPalette) {
+        err = MakeInputImageCopy(enc);
+        if (err != VP8_ENC_OK) goto Error;
+      }
 
-    // -------------------------------------------------------------------------
-    // Apply transforms and write transform data.
+      // -----------------------------------------------------------------------
+      // Apply transforms and write transform data.
 
-    if (enc->use_subtract_green_) {
-      ApplySubtractGreen(enc, enc->current_width_, height, bw);
-    }
+      if (enc->use_subtract_green_) {
+        ApplySubtractGreen(enc, enc->current_width_, height, bw);
+      }
 
-    if (enc->use_predict_) {
-      err = ApplyPredictFilter(enc, enc->current_width_, height, quality,
-                               low_effort, enc->use_subtract_green_, bw);
-      if (err != VP8_ENC_OK) goto Error;
+      if (enc->use_predict_) {
+        err = ApplyPredictFilter(enc, enc->current_width_, height, quality,
+                                 low_effort, enc->use_subtract_green_, bw);
+        if (err != VP8_ENC_OK) goto Error;
+      }
+
+      if (enc->use_cross_color_) {
+        err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
+                                    low_effort, bw);
+        if (err != VP8_ENC_OK) goto Error;
+      }
     }
 
-    if (enc->use_cross_color_) {
-      err = ApplyCrossColorFilter(enc, enc->current_width_,
-                                  height, quality, low_effort, bw);
-      if (err != VP8_ENC_OK) goto Error;
+    VP8LPutBits(bw, !TRANSFORM_PRESENT, 1);  // No more transforms.
+
+    // -------------------------------------------------------------------------
+    // Encode and write the transformed image.
+    err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
+                              enc->current_width_, height, quality, low_effort,
+                              use_cache, &crunch_configs[idx],
+                              &enc->cache_bits_, enc->histo_bits_,
+                              byte_position, &hdr_size, &data_size);
+    if (err != VP8_ENC_OK) goto Error;
+
+    // If we are better than what we already have.
+    if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) {
+      best_size = VP8LBitWriterNumBytes(bw);
+      // Store the BitWriter.
+      VP8LBitWriterSwap(bw, &bw_best);
+#if !defined(WEBP_DISABLE_STATS)
+      // Update the stats.
+      if (stats != NULL) {
+        stats->lossless_features = 0;
+        if (enc->use_predict_) stats->lossless_features |= 1;
+        if (enc->use_cross_color_) stats->lossless_features |= 2;
+        if (enc->use_subtract_green_) stats->lossless_features |= 4;
+        if (enc->use_palette_) stats->lossless_features |= 8;
+        stats->histogram_bits = enc->histo_bits_;
+        stats->transform_bits = enc->transform_bits_;
+        stats->cache_bits = enc->cache_bits_;
+        stats->palette_size = enc->palette_size_;
+        stats->lossless_size = (int)(best_size - byte_position);
+        stats->lossless_hdr_size = hdr_size;
+        stats->lossless_data_size = data_size;
+      }
+#endif
     }
+    // Reset the bit writer for the following iteration if any.
+    if (num_crunch_configs > 1) VP8LBitWriterReset(&bw_init, bw);
   }
+  VP8LBitWriterSwap(&bw_best, bw);
 
-  VP8LPutBits(bw, !TRANSFORM_PRESENT, 1);  // No more transforms.
+Error:
+  VP8LBitWriterWipeOut(&bw_best);
+  params->err_ = err;
+  // The hook should return false in case of error.
+  return (err == VP8_ENC_OK);
+}
 
-  // ---------------------------------------------------------------------------
-  // Encode and write the transformed image.
-  err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
-                            enc->current_width_, height, quality, low_effort,
-                            use_cache, &enc->cache_bits_, enc->histo_bits_,
-                            byte_position, &hdr_size, &data_size);
-  if (err != VP8_ENC_OK) goto Error;
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw_main,
+                                   int use_cache) {
+  WebPEncodingError err = VP8_ENC_OK;
+  VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture);
+  VP8LEncoder* enc_side = NULL;
+  CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX];
+  int num_crunch_configs_main, num_crunch_configs_side = 0;
+  int idx;
+  int red_and_blue_always_zero = 0;
+  WebPWorker worker_main, worker_side;
+  StreamEncodeContext params_main, params_side;
+  // The main thread uses picture->stats, the side thread uses stats_side.
+  WebPAuxStats stats_side;
+  VP8LBitWriter bw_side;
+  const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface();
+  int ok_main;
 
-  if (picture->stats != NULL) {
-    WebPAuxStats* const stats = picture->stats;
-    stats->lossless_features = 0;
-    if (enc->use_predict_) stats->lossless_features |= 1;
-    if (enc->use_cross_color_) stats->lossless_features |= 2;
-    if (enc->use_subtract_green_) stats->lossless_features |= 4;
-    if (enc->use_palette_) stats->lossless_features |= 8;
-    stats->histogram_bits = enc->histo_bits_;
-    stats->transform_bits = enc->transform_bits_;
-    stats->cache_bits = enc->cache_bits_;
-    stats->palette_size = enc->palette_size_;
-    stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
-    stats->lossless_hdr_size = hdr_size;
-    stats->lossless_data_size = data_size;
+  // Analyze image (entropy, num_palettes etc)
+  if (enc_main == NULL ||
+      !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main,
+                      &red_and_blue_always_zero) ||
+      !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
   }
 
- Error:
-  VP8LEncoderDelete(enc);
+  // Split the configs between the main and side threads (if any).
+  if (config->thread_level > 0) {
+    num_crunch_configs_side = num_crunch_configs_main / 2;
+    for (idx = 0; idx < num_crunch_configs_side; ++idx) {
+      params_side.crunch_configs_[idx] =
+          crunch_configs[num_crunch_configs_main - num_crunch_configs_side +
+                         idx];
+    }
+    params_side.num_crunch_configs_ = num_crunch_configs_side;
+  }
+  num_crunch_configs_main -= num_crunch_configs_side;
+  for (idx = 0; idx < num_crunch_configs_main; ++idx) {
+    params_main.crunch_configs_[idx] = crunch_configs[idx];
+  }
+  params_main.num_crunch_configs_ = num_crunch_configs_main;
+
+  // Fill in the parameters for the thread workers.
+  {
+    const int params_size = (num_crunch_configs_side > 0) ? 2 : 1;
+    for (idx = 0; idx < params_size; ++idx) {
+      // Create the parameters for each worker.
+      WebPWorker* const worker = (idx == 0) ? &worker_main : &worker_side;
+      StreamEncodeContext* const param =
+          (idx == 0) ? &params_main : &params_side;
+      param->config_ = config;
+      param->picture_ = picture;
+      param->use_cache_ = use_cache;
+      param->red_and_blue_always_zero_ = red_and_blue_always_zero;
+      if (idx == 0) {
+        param->stats_ = picture->stats;
+        param->bw_ = bw_main;
+        param->enc_ = enc_main;
+      } else {
+        param->stats_ = (picture->stats == NULL) ? NULL : &stats_side;
+        // Create a side bit writer.
+        if (!VP8LBitWriterClone(bw_main, &bw_side)) {
+          err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+          goto Error;
+        }
+        param->bw_ = &bw_side;
+        // Create a side encoder.
+        enc_side = VP8LEncoderNew(config, picture);
+        if (enc_side == NULL || !EncoderInit(enc_side)) {
+          err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+          goto Error;
+        }
+        // Copy the values that were computed for the main encoder.
+        enc_side->histo_bits_ = enc_main->histo_bits_;
+        enc_side->transform_bits_ = enc_main->transform_bits_;
+        enc_side->palette_size_ = enc_main->palette_size_;
+        memcpy(enc_side->palette_, enc_main->palette_,
+               sizeof(enc_main->palette_));
+        param->enc_ = enc_side;
+      }
+      // Create the workers.
+      worker_interface->Init(worker);
+      worker->data1 = param;
+      worker->data2 = NULL;
+      worker->hook = (WebPWorkerHook)EncodeStreamHook;
+    }
+  }
+
+  // Start the second thread if needed.
+  if (num_crunch_configs_side != 0) {
+    if (!worker_interface->Reset(&worker_side)) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+#if !defined(WEBP_DISABLE_STATS)
+    // This line is here and not in the param initialization above to remove a
+    // Clang static analyzer warning.
+    if (picture->stats != NULL) {
+      memcpy(&stats_side, picture->stats, sizeof(stats_side));
+    }
+#endif
+    // This line is only useful to remove a Clang static analyzer warning.
+    params_side.err_ = VP8_ENC_OK;
+    worker_interface->Launch(&worker_side);
+  }
+  // Execute the main thread.
+  worker_interface->Execute(&worker_main);
+  ok_main = worker_interface->Sync(&worker_main);
+  worker_interface->End(&worker_main);
+  if (num_crunch_configs_side != 0) {
+    // Wait for the second thread.
+    const int ok_side = worker_interface->Sync(&worker_side);
+    worker_interface->End(&worker_side);
+    if (!ok_main || !ok_side) {
+      err = ok_main ? params_side.err_ : params_main.err_;
+      goto Error;
+    }
+    if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) {
+      VP8LBitWriterSwap(bw_main, &bw_side);
+#if !defined(WEBP_DISABLE_STATS)
+      if (picture->stats != NULL) {
+        memcpy(picture->stats, &stats_side, sizeof(*picture->stats));
+      }
+#endif
+    }
+  } else {
+    if (!ok_main) {
+      err = params_main.err_;
+      goto Error;
+    }
+  }
+
+Error:
+  VP8LBitWriterWipeOut(&bw_side);
+  VP8LEncoderDelete(enc_main);
+  VP8LEncoderDelete(enc_side);
   return err;
 }
 
+#undef CRUNCH_CONFIGS_MAX
+#undef CRUNCH_CONFIGS_LZ77_MAX
+
 int VP8LEncodeImage(const WebPConfig* const config,
                     const WebPPicture* const picture) {
   int width, height;
@@ -1642,11 +1953,13 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
   if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
 
+#if !defined(WEBP_DISABLE_STATS)
   // Save size.
   if (picture->stats != NULL) {
     picture->stats->coded_size += (int)coded_size;
     picture->stats->lossless_size = (int)coded_size;
   }
+#endif
 
   if (picture->extra_info != NULL) {
     const int mb_w = (width + 15) >> 4;
diff --git a/thirdparty/libwebp/enc/vp8li_enc.h b/thirdparty/libwebp/src/enc/vp8li_enc.h
index 8c5fbcbb2e..298a4a0014 100644
--- a/thirdparty/libwebp/enc/vp8li_enc.h
+++ b/thirdparty/libwebp/src/enc/vp8li_enc.h
@@ -11,14 +11,23 @@
 //
 // Author: Vikas Arora (vikaas.arora@gmail.com)
 
-#ifndef WEBP_ENC_VP8LI_H_
-#define WEBP_ENC_VP8LI_H_
+#ifndef WEBP_ENC_VP8LI_ENC_H_
+#define WEBP_ENC_VP8LI_ENC_H_
 
-#include "./backward_references_enc.h"
-#include "./histogram_enc.h"
-#include "../utils/bit_writer_utils.h"
-#include "../webp/encode.h"
-#include "../webp/format_constants.h"
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+// Either WEBP_NEAR_LOSSLESS is defined as 0 in config.h when compiling to
+// disable near-lossless, or it is enabled by default.
+#ifndef WEBP_NEAR_LOSSLESS
+#define WEBP_NEAR_LOSSLESS 1
+#endif
+
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/utils/bit_writer_utils.h"
+#include "src/webp/encode.h"
+#include "src/webp/format_constants.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -27,16 +36,24 @@ extern "C" {
 // maximum value of transform_bits_ in VP8LEncoder.
 #define MAX_TRANSFORM_BITS 6
 
+typedef enum {
+  kEncoderNone = 0,
+  kEncoderARGB,
+  kEncoderNearLossless,
+  kEncoderPalette
+} VP8LEncoderARGBContent;
+
 typedef struct {
   const WebPConfig* config_;      // user configuration and parameters
   const WebPPicture* pic_;        // input picture.
 
-  uint32_t* argb_;                // Transformed argb image data.
-  uint32_t* argb_scratch_;        // Scratch memory for argb rows
-                                  // (used for prediction).
-  uint32_t* transform_data_;      // Scratch memory for transform data.
-  uint32_t* transform_mem_;       // Currently allocated memory.
-  size_t    transform_mem_size_;  // Currently allocated memory size.
+  uint32_t* argb_;                       // Transformed argb image data.
+  VP8LEncoderARGBContent argb_content_;  // Content type of the argb buffer.
+  uint32_t* argb_scratch_;               // Scratch memory for argb rows
+                                         // (used for prediction).
+  uint32_t* transform_data_;             // Scratch memory for transform data.
+  uint32_t* transform_mem_;              // Currently allocated memory.
+  size_t    transform_mem_size_;         // Currently allocated memory size.
 
   int       current_width_;       // Corresponds to packed image width.
 
@@ -54,8 +71,7 @@ typedef struct {
   uint32_t palette_[MAX_PALETTE_SIZE];
 
   // Some 'scratch' (potentially large) objects.
-  struct VP8LBackwardRefs refs_[2];  // Backward Refs array corresponding to
-                                     // LZ77 & RLE coding.
+  struct VP8LBackwardRefs refs_[3];  // Backward Refs array for temporaries.
   VP8LHashChain hash_chain_;         // HashChain data for constructing
                                      // backward references.
 } VP8LEncoder;
@@ -75,6 +91,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
                                    const WebPPicture* const picture,
                                    VP8LBitWriter* const bw, int use_cache);
 
+#if (WEBP_NEAR_LOSSLESS == 1)
+// in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
+                         uint32_t* const argb_dst);
+#endif
+
 //------------------------------------------------------------------------------
 // Image transforms in predictor.c.
 
@@ -92,4 +115,4 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_ENC_VP8LI_H_ */
+#endif  /* WEBP_ENC_VP8LI_ENC_H_ */
diff --git a/thirdparty/libwebp/enc/webp_enc.c b/thirdparty/libwebp/src/enc/webp_enc.c
index f18461ef92..283cda8e7b 100644
--- a/thirdparty/libwebp/enc/webp_enc.c
+++ b/thirdparty/libwebp/src/enc/webp_enc.c
@@ -16,10 +16,10 @@
 #include <string.h>
 #include <math.h>
 
-#include "./cost_enc.h"
-#include "./vp8i_enc.h"
-#include "./vp8li_enc.h"
-#include "../utils/utils.h"
+#include "src/enc/cost_enc.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/vp8li_enc.h"
+#include "src/utils/utils.h"
 
 // #define PRINT_MEMORY_INFO
 
@@ -207,7 +207,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
   enc->preds_w_ = preds_w;
   enc->mb_info_ = (VP8MBInfo*)mem;
   mem += info_size;
-  enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
+  enc->preds_ = mem + 1 + enc->preds_w_;
   mem += preds_size;
   enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem);
   mem += nz_size;
@@ -216,7 +216,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
 
   // top samples (all 16-aligned)
   mem = (uint8_t*)WEBP_ALIGN(mem);
-  enc->y_top_ = (uint8_t*)mem;
+  enc->y_top_ = mem;
   enc->uv_top_ = enc->y_top_ + top_stride;
   mem += 2 * top_stride;
   assert(mem <= (uint8_t*)enc + size);
@@ -256,6 +256,7 @@ static int DeleteVP8Encoder(VP8Encoder* enc) {
 
 //------------------------------------------------------------------------------
 
+#if !defined(WEBP_DISABLE_STATS)
 static double GetPSNR(uint64_t err, uint64_t size) {
   return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.;
 }
@@ -270,8 +271,10 @@ static void FinalizePSNR(const VP8Encoder* const enc) {
   stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
   stats->PSNR[4] = (float)GetPSNR(sse[3], size);
 }
+#endif  // !defined(WEBP_DISABLE_STATS)
 
 static void StoreStats(VP8Encoder* const enc) {
+#if !defined(WEBP_DISABLE_STATS)
   WebPAuxStats* const stats = enc->pic_->stats;
   if (stats != NULL) {
     int i, s;
@@ -288,7 +291,9 @@ static void StoreStats(VP8Encoder* const enc) {
       stats->block_count[i] = enc->block_count_[i];
     }
   }
+#else  // defined(WEBP_DISABLE_STATS)
   WebPReportProgress(enc->pic_, 100, &enc->percent_);  // done!
+#endif  // !defined(WEBP_DISABLE_STATS)
 }
 
 int WebPEncodingSetError(const WebPPicture* const pic,
@@ -336,10 +341,6 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
   if (!config->lossless) {
     VP8Encoder* enc = NULL;
 
-    if (!config->exact) {
-      WebPCleanupTransparentArea(pic);
-    }
-
     if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) {
       // Make sure we have YUVA samples.
       if (config->use_sharp_yuv || (config->preprocessing & 4)) {
@@ -361,6 +362,10 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
       }
     }
 
+    if (!config->exact) {
+      WebPCleanupTransparentArea(pic);
+    }
+
     enc = InitVP8Encoder(config, pic);
     if (enc == NULL) return 0;  // pic->error is already set.
     // Note: each of the tasks below account for 20% in the progress report.
diff --git a/thirdparty/libwebp/mux/anim_encode.c b/thirdparty/libwebp/src/mux/anim_encode.c
index 6066388727..7be99068f6 100644
--- a/thirdparty/libwebp/mux/anim_encode.c
+++ b/thirdparty/libwebp/src/mux/anim_encode.c
@@ -16,12 +16,12 @@
 #include <stdio.h>
 #include <stdlib.h>  // for abs()
 
-#include "../mux/animi.h"
-#include "../utils/utils.h"
-#include "../webp/decode.h"
-#include "../webp/encode.h"
-#include "../webp/format_constants.h"
-#include "../webp/mux.h"
+#include "src/mux/animi.h"
+#include "src/utils/utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/encode.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/mux.h"
 
 #if defined(_MSC_VER) && _MSC_VER < 1900
 #define snprintf _snprintf
@@ -35,7 +35,7 @@
 // Stores frame rectangle dimensions.
 typedef struct {
   int x_offset_, y_offset_, width_, height_;
-} FrameRect;
+} FrameRectangle;
 
 // Used to store two candidates of encoded data for an animation frame. One of
 // the two will be chosen later.
@@ -50,7 +50,7 @@ struct WebPAnimEncoder {
   const int canvas_height_;                 // Canvas height.
   const WebPAnimEncoderOptions options_;    // Global encoding options.
 
-  FrameRect prev_rect_;               // Previous WebP frame rectangle.
+  FrameRectangle prev_rect_;          // Previous WebP frame rectangle.
   WebPConfig last_config_;            // Cached in case a re-encode is needed.
   WebPConfig last_config_reversed_;   // If 'last_config_' uses lossless, then
                                       // this config uses lossy and vice versa;
@@ -206,7 +206,7 @@ static void ClearRectangle(WebPPicture* const picture,
 }
 
 static void WebPUtilClearPic(WebPPicture* const picture,
-                             const FrameRect* const rect) {
+                             const FrameRectangle* const rect) {
   if (rect != NULL) {
     ClearRectangle(picture, rect->x_offset_, rect->y_offset_,
                    rect->width_, rect->height_);
@@ -400,7 +400,7 @@ static WEBP_INLINE int ComparePixelsLossy(const uint32_t* src, int src_step,
   return 1;
 }
 
-static int IsEmptyRect(const FrameRect* const rect) {
+static int IsEmptyRect(const FrameRectangle* const rect) {
   return (rect->width_ == 0) || (rect->height_ == 0);
 }
 
@@ -413,7 +413,7 @@ static int QualityToMaxDiff(float quality) {
 // Assumes that an initial valid guess of change rectangle 'rect' is passed.
 static void MinimizeChangeRectangle(const WebPPicture* const src,
                                     const WebPPicture* const dst,
-                                    FrameRect* const rect,
+                                    FrameRectangle* const rect,
                                     int is_lossless, float quality) {
   int i, j;
   const ComparePixelsFunc compare_pixels =
@@ -498,7 +498,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src,
 }
 
 // Snap rectangle to even offsets (and adjust dimensions if needed).
-static WEBP_INLINE void SnapToEvenOffsets(FrameRect* const rect) {
+static WEBP_INLINE void SnapToEvenOffsets(FrameRectangle* const rect) {
   rect->width_ += (rect->x_offset_ & 1);
   rect->height_ += (rect->y_offset_ & 1);
   rect->x_offset_ &= ~1;
@@ -508,9 +508,9 @@ static WEBP_INLINE void SnapToEvenOffsets(FrameRect* const rect) {
 typedef struct {
   int should_try_;               // Should try this set of parameters.
   int empty_rect_allowed_;       // Frame with empty rectangle can be skipped.
-  FrameRect rect_ll_;            // Frame rectangle for lossless compression.
+  FrameRectangle rect_ll_;       // Frame rectangle for lossless compression.
   WebPPicture sub_frame_ll_;     // Sub-frame pic for lossless compression.
-  FrameRect rect_lossy_;         // Frame rectangle for lossy compression.
+  FrameRectangle rect_lossy_;    // Frame rectangle for lossy compression.
                                  // Could be smaller than rect_ll_ as pixels
                                  // with small diffs can be ignored.
   WebPPicture sub_frame_lossy_;  // Sub-frame pic for lossless compression.
@@ -538,7 +538,8 @@ static void SubFrameParamsFree(SubFrameParams* const params) {
 static int GetSubRect(const WebPPicture* const prev_canvas,
                       const WebPPicture* const curr_canvas, int is_key_frame,
                       int is_first_frame, int empty_rect_allowed,
-                      int is_lossless, float quality, FrameRect* const rect,
+                      int is_lossless, float quality,
+                      FrameRectangle* const rect,
                       WebPPicture* const sub_frame) {
   if (!is_key_frame || is_first_frame) {  // Optimize frame rectangle.
     // Note: This behaves as expected for first frame, as 'prev_canvas' is
@@ -594,7 +595,7 @@ int WebPAnimEncoderRefineRect(
     const WebPPicture* const prev_canvas, const WebPPicture* const curr_canvas,
     int is_lossless, float quality, int* const x_offset, int* const y_offset,
     int* const width, int* const height) {
-  FrameRect rect;
+  FrameRectangle rect;
   const int right = clip(*x_offset + *width, 0, curr_canvas->width);
   const int left = clip(*x_offset, 0, curr_canvas->width - 1);
   const int bottom = clip(*y_offset + *height, 0, curr_canvas->height);
@@ -620,7 +621,7 @@ int WebPAnimEncoderRefineRect(
 }
 
 static void DisposeFrameRectangle(int dispose_method,
-                                  const FrameRect* const rect,
+                                  const FrameRectangle* const rect,
                                   WebPPicture* const curr_canvas) {
   assert(rect != NULL);
   if (dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
@@ -628,13 +629,13 @@ static void DisposeFrameRectangle(int dispose_method,
   }
 }
 
-static uint32_t RectArea(const FrameRect* const rect) {
+static uint32_t RectArea(const FrameRectangle* const rect) {
   return (uint32_t)rect->width_ * rect->height_;
 }
 
 static int IsLosslessBlendingPossible(const WebPPicture* const src,
                                       const WebPPicture* const dst,
-                                      const FrameRect* const rect) {
+                                      const FrameRectangle* const rect) {
   int i, j;
   assert(src->width == dst->width && src->height == dst->height);
   assert(rect->x_offset_ + rect->width_ <= dst->width);
@@ -656,7 +657,7 @@ static int IsLosslessBlendingPossible(const WebPPicture* const src,
 
 static int IsLossyBlendingPossible(const WebPPicture* const src,
                                    const WebPPicture* const dst,
-                                   const FrameRect* const rect,
+                                   const FrameRectangle* const rect,
                                    float quality) {
   const int max_allowed_diff_lossy = QualityToMaxDiff(quality);
   int i, j;
@@ -683,7 +684,7 @@ static int IsLossyBlendingPossible(const WebPPicture* const src,
 // transparent pixels.
 // Returns true if at least one pixel gets modified.
 static int IncreaseTransparency(const WebPPicture* const src,
-                                const FrameRect* const rect,
+                                const FrameRectangle* const rect,
                                 WebPPicture* const dst) {
   int i, j;
   int modified = 0;
@@ -709,7 +710,7 @@ static int IncreaseTransparency(const WebPPicture* const src,
 // Assumes lossy compression is being used.
 // Returns true if at least one pixel gets modified.
 static int FlattenSimilarBlocks(const WebPPicture* const src,
-                                const FrameRect* const rect,
+                                const FrameRectangle* const rect,
                                 WebPPicture* const dst, float quality) {
   const int max_allowed_diff_lossy = QualityToMaxDiff(quality);
   int i, j;
@@ -778,13 +779,13 @@ static int EncodeFrame(const WebPConfig* const config, WebPPicture* const pic,
 typedef struct {
   WebPMemoryWriter  mem_;
   WebPMuxFrameInfo  info_;
-  FrameRect         rect_;
+  FrameRectangle    rect_;
   int               evaluate_;  // True if this candidate should be evaluated.
 } Candidate;
 
 // Generates a candidate encoded frame given a picture and metadata.
 static WebPEncodingError EncodeCandidate(WebPPicture* const sub_frame,
-                                         const FrameRect* const rect,
+                                         const FrameRectangle* const rect,
                                          const WebPConfig* const encoder_config,
                                          int use_blending,
                                          Candidate* const candidate) {
@@ -958,7 +959,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
   if (new_duration >= MAX_DURATION) {  // Special case.
     // Separate out previous frame from earlier merged frames to avoid overflow.
     // We add a 1x1 transparent frame for the previous frame, with blending on.
-    const FrameRect rect = { 0, 0, 1, 1 };
+    const FrameRectangle rect = { 0, 0, 1, 1 };
     const uint8_t lossless_1x1_bytes[] = {
       0x52, 0x49, 0x46, 0x46, 0x14, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50,
       0x56, 0x50, 0x38, 0x4c, 0x08, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00,
@@ -1223,7 +1224,7 @@ static int CacheFrame(WebPAnimEncoder* const enc,
       enc->prev_candidate_undecided_ = 0;
     } else {
       int64_t curr_delta;
-      FrameRect prev_rect_key, prev_rect_sub;
+      FrameRectangle prev_rect_key, prev_rect_sub;
 
       // Add this as a frame rectangle to enc.
       error_code = SetFrame(enc, config, 0, encoded_frame, &frame_skipped);
@@ -1535,7 +1536,8 @@ int WebPAnimEncoderAssemble(WebPAnimEncoder* enc, WebPData* webp_data) {
 
   if (!enc->got_null_frame_ && enc->in_frame_count_ > 1 && enc->count_ > 0) {
     // set duration of the last frame to be avg of durations of previous frames.
-    const double delta_time = enc->prev_timestamp_ - enc->first_timestamp_;
+    const double delta_time =
+        (uint32_t)enc->prev_timestamp_ - enc->first_timestamp_;
     const int average_duration = (int)(delta_time / (enc->in_frame_count_ - 1));
     if (!IncreasePreviousDuration(enc, average_duration)) {
       return 0;
diff --git a/thirdparty/libwebp/mux/animi.h b/thirdparty/libwebp/src/mux/animi.h
index cecaf1fee5..88899532aa 100644
--- a/thirdparty/libwebp/mux/animi.h
+++ b/thirdparty/libwebp/src/mux/animi.h
@@ -14,7 +14,7 @@
 #ifndef WEBP_MUX_ANIMI_H_
 #define WEBP_MUX_ANIMI_H_
 
-#include "../webp/mux.h"
+#include "src/webp/mux.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/thirdparty/libwebp/mux/muxedit.c b/thirdparty/libwebp/src/mux/muxedit.c
index d2c5305372..7a027b3cb4 100644
--- a/thirdparty/libwebp/mux/muxedit.c
+++ b/thirdparty/libwebp/src/mux/muxedit.c
@@ -13,8 +13,8 @@
 //          Vikas (vikasa@google.com)
 
 #include <assert.h>
-#include "./muxi.h"
-#include "../utils/utils.h"
+#include "src/mux/muxi.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Life of a mux object.
diff --git a/thirdparty/libwebp/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h
index e6606aa5d1..b73e3fbd7a 100644
--- a/thirdparty/libwebp/mux/muxi.h
+++ b/thirdparty/libwebp/src/mux/muxi.h
@@ -15,9 +15,9 @@
 #define WEBP_MUX_MUXI_H_
 
 #include <stdlib.h>
-#include "../dec/vp8i_dec.h"
-#include "../dec/vp8li_dec.h"
-#include "../webp/mux.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/webp/mux.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -28,7 +28,7 @@ extern "C" {
 
 #define MUX_MAJ_VERSION 0
 #define MUX_MIN_VERSION 4
-#define MUX_REV_VERSION 0
+#define MUX_REV_VERSION 1
 
 // Chunk object.
 typedef struct WebPChunk WebPChunk;
diff --git a/thirdparty/libwebp/mux/muxinternal.c b/thirdparty/libwebp/src/mux/muxinternal.c
index 387b57e8fe..1473f100e5 100644
--- a/thirdparty/libwebp/mux/muxinternal.c
+++ b/thirdparty/libwebp/src/mux/muxinternal.c
@@ -13,8 +13,8 @@
 //          Vikas (vikasa@google.com)
 
 #include <assert.h>
-#include "./muxi.h"
-#include "../utils/utils.h"
+#include "src/mux/muxi.h"
+#include "src/utils/utils.h"
 
 #define UNDEFINED_CHUNK_SIZE ((uint32_t)(-1))
 
@@ -504,6 +504,20 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
     if (!has_animation && (num_anim == 1 || num_frames > 0)) {
       return WEBP_MUX_INVALID_ARGUMENT;
     }
+    if (!has_animation) {
+      const WebPMuxImage* images = mux->images_;
+      // There can be only one image.
+      if (images == NULL || images->next_ != NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+      }
+      // Size must match.
+      if (mux->canvas_width_ > 0) {
+        if (images->width_ != mux->canvas_width_ ||
+            images->height_ != mux->canvas_height_) {
+          return WEBP_MUX_INVALID_ARGUMENT;
+        }
+      }
+    }
   }
 
   // Verify either VP8X chunk is present OR there is only one elem in
@@ -515,6 +529,7 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
   if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
 
   // ALPHA_FLAG & alpha chunk(s) are consistent.
+  // Note: ALPHA_FLAG can be set when there is actually no Alpha data present.
   if (MuxHasAlpha(mux->images_)) {
     if (num_vp8x > 0) {
       // VP8X chunk is present, so it should contain ALPHA_FLAG.
@@ -525,8 +540,6 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
       if (err != WEBP_MUX_OK) return err;
       if (num_alpha > 0) return WEBP_MUX_INVALID_ARGUMENT;
     }
-  } else {  // Mux doesn't need alpha. So, ALPHA_FLAG should NOT be present.
-    if (flags & ALPHA_FLAG) return WEBP_MUX_INVALID_ARGUMENT;
   }
 
   return WEBP_MUX_OK;
diff --git a/thirdparty/libwebp/mux/muxread.c b/thirdparty/libwebp/src/mux/muxread.c
index 410acd9119..0b55286862 100644
--- a/thirdparty/libwebp/mux/muxread.c
+++ b/thirdparty/libwebp/src/mux/muxread.c
@@ -13,8 +13,8 @@
 //          Vikas (vikasa@google.com)
 
 #include <assert.h>
-#include "./muxi.h"
-#include "../utils/utils.h"
+#include "src/mux/muxi.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Helper method(s).
@@ -43,7 +43,7 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
   SWITCH_ID_LIST(IDX_ANIM, mux->anim_);
   SWITCH_ID_LIST(IDX_EXIF, mux->exif_);
   SWITCH_ID_LIST(IDX_XMP, mux->xmp_);
-  SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
+  assert(idx != IDX_UNKNOWN);
   return WEBP_MUX_NOT_FOUND;
 }
 #undef SWITCH_ID_LIST
@@ -270,6 +270,9 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
     ChunkInit(&chunk);
   }
 
+  // Incomplete image.
+  if (wpi->is_partial_) goto Err;
+
   // Validate mux if complete.
   if (MuxValidate(mux) != WEBP_MUX_OK) goto Err;
 
diff --git a/thirdparty/libwebp/utils/bit_reader_inl_utils.h b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h
index fd7fb0446c..2ccc6ed326 100644
--- a/thirdparty/libwebp/utils/bit_reader_inl_utils.h
+++ b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h
@@ -13,19 +13,19 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_UTILS_BIT_READER_INL_H_
-#define WEBP_UTILS_BIT_READER_INL_H_
+#ifndef WEBP_UTILS_BIT_READER_INL_UTILS_H_
+#define WEBP_UTILS_BIT_READER_INL_UTILS_H_
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
 #include <string.h>  // for memcpy
 
-#include "../dsp/dsp.h"
-#include "./bit_reader_utils.h"
-#include "./endian_inl_utils.h"
-#include "./utils.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/utils/utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -187,4 +187,4 @@ static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
 }    // extern "C"
 #endif
 
-#endif   // WEBP_UTILS_BIT_READER_INL_H_
+#endif   // WEBP_UTILS_BIT_READER_INL_UTILS_H_
diff --git a/thirdparty/libwebp/utils/bit_reader_utils.c b/thirdparty/libwebp/src/utils/bit_reader_utils.c
index 053b710bb8..5fa3ae7795 100644
--- a/thirdparty/libwebp/utils/bit_reader_utils.c
+++ b/thirdparty/libwebp/src/utils/bit_reader_utils.c
@@ -12,11 +12,11 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
-#include "./bit_reader_inl_utils.h"
-#include "../utils/utils.h"
+#include "src/utils/bit_reader_inl_utils.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // VP8BitReader
diff --git a/thirdparty/libwebp/utils/bit_reader_utils.h b/thirdparty/libwebp/src/utils/bit_reader_utils.h
index ea5c584eb4..04f9804409 100644
--- a/thirdparty/libwebp/utils/bit_reader_utils.h
+++ b/thirdparty/libwebp/src/utils/bit_reader_utils.h
@@ -12,14 +12,14 @@
 // Author: Skal (pascal.massimino@gmail.com)
 //         Vikas Arora (vikaas.arora@gmail.com)
 
-#ifndef WEBP_UTILS_BIT_READER_H_
-#define WEBP_UTILS_BIT_READER_H_
+#ifndef WEBP_UTILS_BIT_READER_UTILS_H_
+#define WEBP_UTILS_BIT_READER_UTILS_H_
 
 #include <assert.h>
 #ifdef _MSC_VER
 #include <stdlib.h>  // _byteswap_ulong
 #endif
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -165,9 +165,10 @@ static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
 
 // For jumping over a number of bits in the bit stream when accessed with
 // VP8LPrefetchBits and VP8LFillBitWindow.
+// This function does *not* set br->eos_, since it's speed-critical.
+// Use with extreme care!
 static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
   br->bit_pos_ = val;
-  br->eos_ = VP8LIsEndOfStream(br);
 }
 
 // Advances the read buffer by 4 bytes to make room for reading next 32 bits.
@@ -181,4 +182,4 @@ static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_BIT_READER_H_ */
+#endif  /* WEBP_UTILS_BIT_READER_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/bit_writer_utils.c b/thirdparty/libwebp/src/utils/bit_writer_utils.c
index ab0c49dce8..f4f476ce3f 100644
--- a/thirdparty/libwebp/utils/bit_writer_utils.c
+++ b/thirdparty/libwebp/src/utils/bit_writer_utils.c
@@ -16,9 +16,9 @@
 #include <string.h>   // for memcpy()
 #include <stdlib.h>
 
-#include "./bit_writer_utils.h"
-#include "./endian_inl_utils.h"
-#include "./utils.h"
+#include "src/utils/bit_writer_utils.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // VP8BitWriter
@@ -239,6 +239,18 @@ int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
   return VP8LBitWriterResize(bw, expected_size);
 }
 
+int VP8LBitWriterClone(const VP8LBitWriter* const src,
+                       VP8LBitWriter* const dst) {
+  const size_t current_size = src->cur_ - src->buf_;
+  assert(src->cur_ >= src->buf_ && src->cur_ <= src->end_);
+  if (!VP8LBitWriterResize(dst, current_size)) return 0;
+  memcpy(dst->buf_, src->buf_, current_size);
+  dst->bits_ = src->bits_;
+  dst->used_ = src->used_;
+  dst->error_ = src->error_;
+  return 1;
+}
+
 void VP8LBitWriterWipeOut(VP8LBitWriter* const bw) {
   if (bw != NULL) {
     WebPSafeFree(bw->buf_);
@@ -246,6 +258,21 @@ void VP8LBitWriterWipeOut(VP8LBitWriter* const bw) {
   }
 }
 
+void VP8LBitWriterReset(const VP8LBitWriter* const bw_init,
+                        VP8LBitWriter* const bw) {
+  bw->bits_ = bw_init->bits_;
+  bw->used_ = bw_init->used_;
+  bw->cur_ = bw->buf_ + (bw_init->cur_ - bw_init->buf_);
+  assert(bw->cur_ <= bw->end_);
+  bw->error_ = bw_init->error_;
+}
+
+void VP8LBitWriterSwap(VP8LBitWriter* const src, VP8LBitWriter* const dst) {
+  const VP8LBitWriter tmp = *src;
+  *src = *dst;
+  *dst = tmp;
+}
+
 void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
   // If needed, make some room by flushing some bits out.
   if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
diff --git a/thirdparty/libwebp/utils/bit_writer_utils.h b/thirdparty/libwebp/src/utils/bit_writer_utils.h
index 9c02bbc06d..2cf5976fe3 100644
--- a/thirdparty/libwebp/utils/bit_writer_utils.h
+++ b/thirdparty/libwebp/src/utils/bit_writer_utils.h
@@ -11,10 +11,10 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_UTILS_BIT_WRITER_H_
-#define WEBP_UTILS_BIT_WRITER_H_
+#ifndef WEBP_UTILS_BIT_WRITER_UTILS_H_
+#define WEBP_UTILS_BIT_WRITER_UTILS_H_
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -100,16 +100,24 @@ typedef struct {
   int error_;
 } VP8LBitWriter;
 
-static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
+static WEBP_INLINE size_t VP8LBitWriterNumBytes(const VP8LBitWriter* const bw) {
   return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
 }
 
 // Returns false in case of memory allocation error.
 int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+// Returns false in case of memory allocation error.
+int VP8LBitWriterClone(const VP8LBitWriter* const src,
+                       VP8LBitWriter* const dst);
 // Finalize the bitstream coding. Returns a pointer to the internal buffer.
 uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
 // Release any pending memory and zeroes the object.
 void VP8LBitWriterWipeOut(VP8LBitWriter* const bw);
+// Resets the cursor of the BitWriter bw to when it was like in bw_init.
+void VP8LBitWriterReset(const VP8LBitWriter* const bw_init,
+                        VP8LBitWriter* const bw);
+// Swaps the memory held by two BitWriters.
+void VP8LBitWriterSwap(VP8LBitWriter* const src, VP8LBitWriter* const dst);
 
 // Internal function for VP8LPutBits flushing 32 bits from the written state.
 void VP8LPutBitsFlushBits(VP8LBitWriter* const bw);
@@ -143,4 +151,4 @@ static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_BIT_WRITER_H_ */
+#endif  /* WEBP_UTILS_BIT_WRITER_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/color_cache_utils.c b/thirdparty/libwebp/src/utils/color_cache_utils.c
index 0172590c48..b09f538e8b 100644
--- a/thirdparty/libwebp/utils/color_cache_utils.c
+++ b/thirdparty/libwebp/src/utils/color_cache_utils.c
@@ -14,8 +14,8 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include "./color_cache_utils.h"
-#include "./utils.h"
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/utils.h"
 
 //------------------------------------------------------------------------------
 // VP8LColorCache.
diff --git a/thirdparty/libwebp/utils/color_cache_utils.h b/thirdparty/libwebp/src/utils/color_cache_utils.h
index c373e6b361..20b7be11c9 100644
--- a/thirdparty/libwebp/utils/color_cache_utils.h
+++ b/thirdparty/libwebp/src/utils/color_cache_utils.h
@@ -12,10 +12,12 @@
 // Authors: Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)
 
-#ifndef WEBP_UTILS_COLOR_CACHE_H_
-#define WEBP_UTILS_COLOR_CACHE_H_
+#ifndef WEBP_UTILS_COLOR_CACHE_UTILS_H_
+#define WEBP_UTILS_COLOR_CACHE_UTILS_H_
 
-#include "../webp/types.h"
+#include <assert.h>
+
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -30,7 +32,7 @@ typedef struct {
 
 static const uint64_t kHashMul = 0x1e35a7bdull;
 
-static WEBP_INLINE int HashPix(uint32_t argb, int shift) {
+static WEBP_INLINE int VP8LHashPix(uint32_t argb, int shift) {
   return (int)(((argb * kHashMul) & 0xffffffffu) >> shift);
 }
 
@@ -48,19 +50,19 @@ static WEBP_INLINE void VP8LColorCacheSet(const VP8LColorCache* const cc,
 
 static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
                                              uint32_t argb) {
-  const int key = HashPix(argb, cc->hash_shift_);
+  const int key = VP8LHashPix(argb, cc->hash_shift_);
   cc->colors_[key] = argb;
 }
 
 static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
                                               uint32_t argb) {
-  return HashPix(argb, cc->hash_shift_);
+  return VP8LHashPix(argb, cc->hash_shift_);
 }
 
 // Return the key if cc contains argb, and -1 otherwise.
 static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
                                               uint32_t argb) {
-  const int key = HashPix(argb, cc->hash_shift_);
+  const int key = VP8LHashPix(argb, cc->hash_shift_);
   return (cc->colors_[key] == argb) ? key : -1;
 }
 
@@ -82,4 +84,4 @@ void VP8LColorCacheClear(VP8LColorCache* const color_cache);
 }
 #endif
 
-#endif  // WEBP_UTILS_COLOR_CACHE_H_
+#endif  // WEBP_UTILS_COLOR_CACHE_UTILS_H_
diff --git a/thirdparty/libwebp/utils/endian_inl_utils.h b/thirdparty/libwebp/src/utils/endian_inl_utils.h
index e11260ff7d..4b2f91dfb8 100644
--- a/thirdparty/libwebp/utils/endian_inl_utils.h
+++ b/thirdparty/libwebp/src/utils/endian_inl_utils.h
@@ -9,15 +9,15 @@
 //
 // Endian related functions.
 
-#ifndef WEBP_UTILS_ENDIAN_INL_H_
-#define WEBP_UTILS_ENDIAN_INL_H_
+#ifndef WEBP_UTILS_ENDIAN_INL_UTILS_H_
+#define WEBP_UTILS_ENDIAN_INL_UTILS_H_
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
-#include "../dsp/dsp.h"
-#include "../webp/types.h"
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"
 
 // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
 #if !defined(WORDS_BIGENDIAN) && \
@@ -97,4 +97,4 @@ static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
 #endif  // HAVE_BUILTIN_BSWAP64
 }
 
-#endif  // WEBP_UTILS_ENDIAN_INL_H_
+#endif  // WEBP_UTILS_ENDIAN_INL_UTILS_H_
diff --git a/thirdparty/libwebp/utils/filters_utils.c b/thirdparty/libwebp/src/utils/filters_utils.c
index 49c1d18a22..bbc2c34d93 100644
--- a/thirdparty/libwebp/utils/filters_utils.c
+++ b/thirdparty/libwebp/src/utils/filters_utils.c
@@ -11,7 +11,7 @@
 //
 // Author: Urvang (urvang@google.com)
 
-#include "./filters_utils.h"
+#include "src/utils/filters_utils.h"
 #include <stdlib.h>
 #include <string.h>
 
diff --git a/thirdparty/libwebp/utils/filters_utils.h b/thirdparty/libwebp/src/utils/filters_utils.h
index 088b132fc5..410f2fcdf2 100644
--- a/thirdparty/libwebp/utils/filters_utils.h
+++ b/thirdparty/libwebp/src/utils/filters_utils.h
@@ -11,11 +11,11 @@
 //
 // Author: Urvang (urvang@google.com)
 
-#ifndef WEBP_UTILS_FILTERS_H_
-#define WEBP_UTILS_FILTERS_H_
+#ifndef WEBP_UTILS_FILTERS_UTILS_H_
+#define WEBP_UTILS_FILTERS_UTILS_H_
 
-#include "../webp/types.h"
-#include "../dsp/dsp.h"
+#include "src/webp/types.h"
+#include "src/dsp/dsp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -29,4 +29,4 @@ WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_FILTERS_H_ */
+#endif  /* WEBP_UTILS_FILTERS_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/huffman_encode_utils.c b/thirdparty/libwebp/src/utils/huffman_encode_utils.c
index f9504658ea..6f3b1bbe02 100644
--- a/thirdparty/libwebp/utils/huffman_encode_utils.c
+++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c
@@ -14,9 +14,9 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include "./huffman_encode_utils.h"
-#include "./utils.h"
-#include "../webp/format_constants.h"
+#include "src/utils/huffman_encode_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
 
 // -----------------------------------------------------------------------------
 // Util function to optimize the symbol map for RLE coding
diff --git a/thirdparty/libwebp/utils/huffman_encode_utils.h b/thirdparty/libwebp/src/utils/huffman_encode_utils.h
index a157165148..3e6763ce49 100644
--- a/thirdparty/libwebp/utils/huffman_encode_utils.h
+++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.h
@@ -11,10 +11,10 @@
 //
 // Entropy encoding (Huffman) for webp lossless
 
-#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
-#define WEBP_UTILS_HUFFMAN_ENCODE_H_
+#ifndef WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
+#define WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -57,4 +57,4 @@ void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
 }
 #endif
 
-#endif  // WEBP_UTILS_HUFFMAN_ENCODE_H_
+#endif  // WEBP_UTILS_HUFFMAN_ENCODE_UTILS_H_
diff --git a/thirdparty/libwebp/utils/huffman_utils.c b/thirdparty/libwebp/src/utils/huffman_utils.c
index 008b5d746f..7a69963c3e 100644
--- a/thirdparty/libwebp/utils/huffman_utils.c
+++ b/thirdparty/libwebp/src/utils/huffman_utils.c
@@ -14,9 +14,9 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include "./huffman_utils.h"
-#include "./utils.h"
-#include "../webp/format_constants.h"
+#include "src/utils/huffman_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
 
 // Huffman data read via DecodeImageStream is represented in two (red and green)
 // bytes.
diff --git a/thirdparty/libwebp/utils/huffman_utils.h b/thirdparty/libwebp/src/utils/huffman_utils.h
index c6dd6aaa45..ff7ef17f3b 100644
--- a/thirdparty/libwebp/utils/huffman_utils.h
+++ b/thirdparty/libwebp/src/utils/huffman_utils.h
@@ -11,12 +11,12 @@
 //
 // Author: Urvang Joshi (urvang@google.com)
 
-#ifndef WEBP_UTILS_HUFFMAN_H_
-#define WEBP_UTILS_HUFFMAN_H_
+#ifndef WEBP_UTILS_HUFFMAN_UTILS_H_
+#define WEBP_UTILS_HUFFMAN_UTILS_H_
 
 #include <assert.h>
-#include "../webp/format_constants.h"
-#include "../webp/types.h"
+#include "src/webp/format_constants.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -85,4 +85,4 @@ int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
 }    // extern "C"
 #endif
 
-#endif  // WEBP_UTILS_HUFFMAN_H_
+#endif  // WEBP_UTILS_HUFFMAN_UTILS_H_
diff --git a/thirdparty/libwebp/utils/quant_levels_dec_utils.c b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c
index d4d23d3147..3818a78b93 100644
--- a/thirdparty/libwebp/utils/quant_levels_dec_utils.c
+++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c
@@ -14,11 +14,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./quant_levels_dec_utils.h"
+#include "src/utils/quant_levels_dec_utils.h"
 
 #include <string.h>   // for memset
 
-#include "./utils.h"
+#include "src/utils/utils.h"
 
 // #define USE_DITHERING   // uncomment to enable ordered dithering (not vital)
 
@@ -71,10 +71,11 @@ typedef struct {
 
 //------------------------------------------------------------------------------
 
-#define CLIP_MASK (int)(~0U << (8 + DFIX))
+#define CLIP_8b_MASK (int)(~0U << (8 + DFIX))
 static WEBP_INLINE uint8_t clip_8b(int v) {
-  return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
+  return (!(v & CLIP_8b_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
 }
+#undef CLIP_8b_MASK
 
 // vertical accumulation
 static void VFilter(SmoothParams* const p) {
diff --git a/thirdparty/libwebp/utils/quant_levels_dec_utils.h b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.h
index 59a13495d3..f822107a72 100644
--- a/thirdparty/libwebp/utils/quant_levels_dec_utils.h
+++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.h
@@ -11,10 +11,10 @@
 //
 // Author:  Vikas Arora (vikasa@google.com)
 
-#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
-#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
+#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
+#define WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -32,4 +32,4 @@ int WebPDequantizeLevels(uint8_t* const data, int width, int height, int stride,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */
+#endif  /* WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/quant_levels_utils.c b/thirdparty/libwebp/src/utils/quant_levels_utils.c
index 73174e8ab9..d65ad3c29d 100644
--- a/thirdparty/libwebp/utils/quant_levels_utils.c
+++ b/thirdparty/libwebp/src/utils/quant_levels_utils.c
@@ -14,7 +14,7 @@
 
 #include <assert.h>
 
-#include "./quant_levels_utils.h"
+#include "src/utils/quant_levels_utils.h"
 
 #define NUM_SYMBOLS     256
 
diff --git a/thirdparty/libwebp/utils/quant_levels_utils.h b/thirdparty/libwebp/src/utils/quant_levels_utils.h
index 1cb5a32cae..75df2ba6a4 100644
--- a/thirdparty/libwebp/utils/quant_levels_utils.h
+++ b/thirdparty/libwebp/src/utils/quant_levels_utils.h
@@ -11,12 +11,12 @@
 //
 // Author:  Vikas Arora (vikasa@google.com)
 
-#ifndef WEBP_UTILS_QUANT_LEVELS_H_
-#define WEBP_UTILS_QUANT_LEVELS_H_
+#ifndef WEBP_UTILS_QUANT_LEVELS_UTILS_H_
+#define WEBP_UTILS_QUANT_LEVELS_UTILS_H_
 
 #include <stdlib.h>
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,4 +33,4 @@ int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_QUANT_LEVELS_H_ */
+#endif  /* WEBP_UTILS_QUANT_LEVELS_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/random_utils.c b/thirdparty/libwebp/src/utils/random_utils.c
index 9f1e4154a6..7edb3fefbb 100644
--- a/thirdparty/libwebp/utils/random_utils.c
+++ b/thirdparty/libwebp/src/utils/random_utils.c
@@ -12,7 +12,7 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <string.h>
-#include "./random_utils.h"
+#include "src/utils/random_utils.h"
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/utils/random_utils.h b/thirdparty/libwebp/src/utils/random_utils.h
index c392a615ca..6d36c667e7 100644
--- a/thirdparty/libwebp/utils/random_utils.h
+++ b/thirdparty/libwebp/src/utils/random_utils.h
@@ -11,11 +11,11 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_UTILS_RANDOM_H_
-#define WEBP_UTILS_RANDOM_H_
+#ifndef WEBP_UTILS_RANDOM_UTILS_H_
+#define WEBP_UTILS_RANDOM_UTILS_H_
 
 #include <assert.h>
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -60,4 +60,4 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_RANDOM_H_ */
+#endif  /* WEBP_UTILS_RANDOM_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/rescaler_utils.c b/thirdparty/libwebp/src/utils/rescaler_utils.c
index 0d1f80da24..90e2ea76a1 100644
--- a/thirdparty/libwebp/utils/rescaler_utils.c
+++ b/thirdparty/libwebp/src/utils/rescaler_utils.c
@@ -14,8 +14,8 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include "../dsp/dsp.h"
-#include "./rescaler_utils.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/rescaler_utils.h"
 
 //------------------------------------------------------------------------------
 
@@ -85,11 +85,13 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
 
     // if width is unspecified, scale original proportionally to height ratio.
     if (width == 0) {
-      width = (src_width * height + src_height / 2) / src_height;
+      width =
+          (int)(((uint64_t)src_width * height + src_height / 2) / src_height);
     }
     // if height is unspecified, scale original proportionally to width ratio.
     if (height == 0) {
-      height = (src_height * width + src_width / 2) / src_width;
+      height =
+          (int)(((uint64_t)src_height * width + src_width / 2) / src_width);
     }
     // Check if the overall dimensions still make sense.
     if (width <= 0 || height <= 0) {
diff --git a/thirdparty/libwebp/utils/rescaler_utils.h b/thirdparty/libwebp/src/utils/rescaler_utils.h
index 98b01a76d0..8890e6fa13 100644
--- a/thirdparty/libwebp/utils/rescaler_utils.h
+++ b/thirdparty/libwebp/src/utils/rescaler_utils.h
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_UTILS_RESCALER_H_
-#define WEBP_UTILS_RESCALER_H_
+#ifndef WEBP_UTILS_RESCALER_UTILS_H_
+#define WEBP_UTILS_RESCALER_UTILS_H_
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #define WEBP_RESCALER_RFIX 32   // fixed-point precision for multiplies
 #define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)
@@ -98,4 +98,4 @@ int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_RESCALER_H_ */
+#endif  /* WEBP_UTILS_RESCALER_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/thread_utils.c b/thirdparty/libwebp/src/utils/thread_utils.c
index 1729060c70..2052b6b006 100644
--- a/thirdparty/libwebp/utils/thread_utils.c
+++ b/thirdparty/libwebp/src/utils/thread_utils.c
@@ -13,8 +13,8 @@
 
 #include <assert.h>
 #include <string.h>   // for memset()
-#include "./thread_utils.h"
-#include "./utils.h"
+#include "src/utils/thread_utils.h"
+#include "src/utils/utils.h"
 
 #ifdef WEBP_USE_THREAD
 
@@ -50,11 +50,11 @@ typedef struct {
 
 #endif  // _WIN32
 
-struct WebPWorkerImpl {
+typedef struct {
   pthread_mutex_t mutex_;
   pthread_cond_t  condition_;
   pthread_t       thread_;
-};
+} WebPWorkerImpl;
 
 #if defined(_WIN32)
 
@@ -201,25 +201,24 @@ static int pthread_cond_wait(pthread_cond_t* const condition,
 
 //------------------------------------------------------------------------------
 
-static void Execute(WebPWorker* const worker);  // Forward declaration.
-
 static THREADFN ThreadLoop(void* ptr) {
   WebPWorker* const worker = (WebPWorker*)ptr;
+  WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
   int done = 0;
   while (!done) {
-    pthread_mutex_lock(&worker->impl_->mutex_);
+    pthread_mutex_lock(&impl->mutex_);
     while (worker->status_ == OK) {   // wait in idling mode
-      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+      pthread_cond_wait(&impl->condition_, &impl->mutex_);
     }
     if (worker->status_ == WORK) {
-      Execute(worker);
+      WebPGetWorkerInterface()->Execute(worker);
       worker->status_ = OK;
     } else if (worker->status_ == NOT_OK) {   // finish the worker
       done = 1;
     }
     // signal to the main thread that we're done (for Sync())
-    pthread_cond_signal(&worker->impl_->condition_);
-    pthread_mutex_unlock(&worker->impl_->mutex_);
+    pthread_cond_signal(&impl->condition_);
+    pthread_mutex_unlock(&impl->mutex_);
   }
   return THREAD_RETURN(NULL);    // Thread is finished
 }
@@ -229,21 +228,22 @@ static void ChangeState(WebPWorker* const worker, WebPWorkerStatus new_status) {
   // No-op when attempting to change state on a thread that didn't come up.
   // Checking status_ without acquiring the lock first would result in a data
   // race.
-  if (worker->impl_ == NULL) return;
+  WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
+  if (impl == NULL) return;
 
-  pthread_mutex_lock(&worker->impl_->mutex_);
+  pthread_mutex_lock(&impl->mutex_);
   if (worker->status_ >= OK) {
     // wait for the worker to finish
     while (worker->status_ != OK) {
-      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+      pthread_cond_wait(&impl->condition_, &impl->mutex_);
     }
     // assign new status and release the working thread if needed
     if (new_status != OK) {
       worker->status_ = new_status;
-      pthread_cond_signal(&worker->impl_->condition_);
+      pthread_cond_signal(&impl->condition_);
     }
   }
-  pthread_mutex_unlock(&worker->impl_->mutex_);
+  pthread_mutex_unlock(&impl->mutex_);
 }
 
 #endif  // WEBP_USE_THREAD
@@ -268,26 +268,28 @@ static int Reset(WebPWorker* const worker) {
   worker->had_error = 0;
   if (worker->status_ < OK) {
 #ifdef WEBP_USE_THREAD
-    worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
+    WebPWorkerImpl* const impl =
+        (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(WebPWorkerImpl));
+    worker->impl_ = (void*)impl;
     if (worker->impl_ == NULL) {
       return 0;
     }
-    if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+    if (pthread_mutex_init(&impl->mutex_, NULL)) {
       goto Error;
     }
-    if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
-      pthread_mutex_destroy(&worker->impl_->mutex_);
+    if (pthread_cond_init(&impl->condition_, NULL)) {
+      pthread_mutex_destroy(&impl->mutex_);
       goto Error;
     }
-    pthread_mutex_lock(&worker->impl_->mutex_);
-    ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
+    pthread_mutex_lock(&impl->mutex_);
+    ok = !pthread_create(&impl->thread_, NULL, ThreadLoop, worker);
     if (ok) worker->status_ = OK;
-    pthread_mutex_unlock(&worker->impl_->mutex_);
+    pthread_mutex_unlock(&impl->mutex_);
     if (!ok) {
-      pthread_mutex_destroy(&worker->impl_->mutex_);
-      pthread_cond_destroy(&worker->impl_->condition_);
+      pthread_mutex_destroy(&impl->mutex_);
+      pthread_cond_destroy(&impl->condition_);
  Error:
-      WebPSafeFree(worker->impl_);
+      WebPSafeFree(impl);
       worker->impl_ = NULL;
       return 0;
     }
@@ -318,11 +320,12 @@ static void Launch(WebPWorker* const worker) {
 static void End(WebPWorker* const worker) {
 #ifdef WEBP_USE_THREAD
   if (worker->impl_ != NULL) {
+    WebPWorkerImpl* const impl = (WebPWorkerImpl*)worker->impl_;
     ChangeState(worker, NOT_OK);
-    pthread_join(worker->impl_->thread_, NULL);
-    pthread_mutex_destroy(&worker->impl_->mutex_);
-    pthread_cond_destroy(&worker->impl_->condition_);
-    WebPSafeFree(worker->impl_);
+    pthread_join(impl->thread_, NULL);
+    pthread_mutex_destroy(&impl->mutex_);
+    pthread_cond_destroy(&impl->condition_);
+    WebPSafeFree(impl);
     worker->impl_ = NULL;
   }
 #else
diff --git a/thirdparty/libwebp/utils/thread_utils.h b/thirdparty/libwebp/src/utils/thread_utils.h
index 8408311855..c8ae6c9033 100644
--- a/thirdparty/libwebp/utils/thread_utils.h
+++ b/thirdparty/libwebp/src/utils/thread_utils.h
@@ -11,14 +11,14 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifndef WEBP_UTILS_THREAD_H_
-#define WEBP_UTILS_THREAD_H_
+#ifndef WEBP_UTILS_THREAD_UTILS_H_
+#define WEBP_UTILS_THREAD_UTILS_H_
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
-#include "../webp/types.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -35,12 +35,9 @@ typedef enum {
 // arguments (data1 and data2), and should return false in case of error.
 typedef int (*WebPWorkerHook)(void*, void*);
 
-// Platform-dependent implementation details for the worker.
-typedef struct WebPWorkerImpl WebPWorkerImpl;
-
 // Synchronization object used to launch job in the worker thread
 typedef struct {
-  WebPWorkerImpl* impl_;
+  void* impl_;            // platform-dependent implementation worker details
   WebPWorkerStatus status_;
   WebPWorkerHook hook;    // hook to call
   void* data1;            // first argument passed to 'hook'
@@ -78,11 +75,11 @@ typedef struct {
 // decoding takes place. The contents of the interface struct are copied, it
 // is safe to free the corresponding memory after this call. This function is
 // not thread-safe. Return false in case of invalid pointer or methods.
-WEBP_EXTERN(int) WebPSetWorkerInterface(
+WEBP_EXTERN int WebPSetWorkerInterface(
     const WebPWorkerInterface* const winterface);
 
 // Retrieve the currently set thread worker interface.
-WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
+WEBP_EXTERN const WebPWorkerInterface* WebPGetWorkerInterface(void);
 
 //------------------------------------------------------------------------------
 
@@ -90,4 +87,4 @@ WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
 }    // extern "C"
 #endif
 
-#endif  /* WEBP_UTILS_THREAD_H_ */
+#endif  /* WEBP_UTILS_THREAD_UTILS_H_ */
diff --git a/thirdparty/libwebp/utils/utils.c b/thirdparty/libwebp/src/utils/utils.c
index 504d924b60..44d5c14f01 100644
--- a/thirdparty/libwebp/utils/utils.c
+++ b/thirdparty/libwebp/src/utils/utils.c
@@ -13,10 +13,11 @@
 
 #include <stdlib.h>
 #include <string.h>  // for memcpy()
-#include "../webp/decode.h"
-#include "../webp/encode.h"
-#include "../webp/format_constants.h"  // for MAX_PALETTE_SIZE
-#include "./utils.h"
+#include "src/webp/decode.h"
+#include "src/webp/encode.h"
+#include "src/webp/format_constants.h"  // for MAX_PALETTE_SIZE
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/utils.h"
 
 // If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
 // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
@@ -252,7 +253,6 @@ int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
   int num_colors = 0;
   uint8_t in_use[COLOR_HASH_SIZE] = { 0 };
   uint32_t colors[COLOR_HASH_SIZE];
-  static const uint64_t kHashMul = 0x1e35a7bdull;
   const uint32_t* argb = pic->argb;
   const int width = pic->width;
   const int height = pic->height;
@@ -267,7 +267,7 @@ int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
         continue;
       }
       last_pix = argb[x];
-      key = ((last_pix * kHashMul) & 0xffffffffu) >> COLOR_HASH_RIGHT_SHIFT;
+      key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT);
       while (1) {
         if (!in_use[key]) {
           colors[key] = last_pix;
diff --git a/thirdparty/libwebp/utils/utils.h b/thirdparty/libwebp/src/utils/utils.h
index 3ab459050a..52921bf24e 100644
--- a/thirdparty/libwebp/utils/utils.h
+++ b/thirdparty/libwebp/src/utils/utils.h
@@ -16,14 +16,14 @@
 #define WEBP_UTILS_UTILS_H_
 
 #ifdef HAVE_CONFIG_H
-#include "../webp/config.h"
+#include "src/webp/config.h"
 #endif
 
 #include <assert.h>
 #include <limits.h>
 
-#include "../dsp/dsp.h"
-#include "../webp/types.h"
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -48,13 +48,13 @@ extern "C" {
 // somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
 // safe malloc() borrows the signature from calloc(), pointing at the dangerous
 // underlying multiply involved.
-WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN void* WebPSafeMalloc(uint64_t nmemb, size_t size);
 // Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
 // in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
-WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN void* WebPSafeCalloc(uint64_t nmemb, size_t size);
 
 // Companion deallocation function to the above allocations.
-WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
+WEBP_EXTERN void WebPSafeFree(void* const ptr);
 
 //------------------------------------------------------------------------------
 // Alignment
@@ -66,7 +66,7 @@ WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
 // memcpy() is the safe way of moving potentially unaligned 32b memory.
 static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
   uint32_t A;
-  memcpy(&A, (const int*)ptr, sizeof(A));
+  memcpy(&A, ptr, sizeof(A));
   return A;
 }
 static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
@@ -112,12 +112,12 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
 #define WEBP_NEED_LOG_TABLE_8BIT
 extern const uint8_t WebPLogTable8bit[256];
 static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
-  int log = 0;
+  int log_value = 0;
   while (n >= 256) {
-    log += 8;
+    log_value += 8;
     n >>= 8;
   }
-  return log + WebPLogTable8bit[n];
+  return log_value + WebPLogTable8bit[n];
 }
 
 // Returns (int)floor(log2(n)). n must be > 0.
@@ -147,14 +147,14 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
 struct WebPPicture;
 
 // Copy width x height pixels from 'src' to 'dst' honoring the strides.
-WEBP_EXTERN(void) WebPCopyPlane(const uint8_t* src, int src_stride,
-                                uint8_t* dst, int dst_stride,
-                                int width, int height);
+WEBP_EXTERN void WebPCopyPlane(const uint8_t* src, int src_stride,
+                               uint8_t* dst, int dst_stride,
+                               int width, int height);
 
 // Copy ARGB pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are
 // assumed to be already allocated and using ARGB data.
-WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src,
-                                 struct WebPPicture* const dst);
+WEBP_EXTERN void WebPCopyPixels(const struct WebPPicture* const src,
+                                struct WebPPicture* const dst);
 
 //------------------------------------------------------------------------------
 // Unique colors.
@@ -166,8 +166,8 @@ WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src,
 // MAX_PALETTE_SIZE, also outputs the actual unique colors into 'palette'.
 // Note: 'palette' is assumed to be an array already allocated with at least
 // MAX_PALETTE_SIZE elements.
-WEBP_EXTERN(int) WebPGetColorPalette(const struct WebPPicture* const pic,
-                                     uint32_t* const palette);
+WEBP_EXTERN int WebPGetColorPalette(const struct WebPPicture* const pic,
+                                    uint32_t* const palette);
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h
index 4c5e74ac36..2165e96c95 100644
--- a/thirdparty/libwebp/webp/decode.h
+++ b/thirdparty/libwebp/src/webp/decode.h
@@ -36,39 +36,39 @@ typedef struct WebPDecoderConfig WebPDecoderConfig;
 
 // Return the decoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.
-WEBP_EXTERN(int) WebPGetDecoderVersion(void);
+WEBP_EXTERN int WebPGetDecoderVersion(void);
 
 // Retrieve basic header information: width, height.
 // This function will also validate the header, returning true on success,
 // false otherwise. '*width' and '*height' are only valid on successful return.
 // Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
-WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
-                             int* width, int* height);
+WEBP_EXTERN int WebPGetInfo(const uint8_t* data, size_t data_size,
+                            int* width, int* height);
 
 // Decodes WebP images pointed to by 'data' and returns RGBA samples, along
 // with the dimensions in *width and *height. The ordering of samples in
 // memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
 // The returned pointer should be deleted calling WebPFree().
 // Returns NULL in case of error.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
-                                     int* width, int* height);
+WEBP_EXTERN uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
 
 // Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
-WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size,
-                                     int* width, int* height);
+WEBP_EXTERN uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
 
 // Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
-WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size,
-                                     int* width, int* height);
+WEBP_EXTERN uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
 
 // Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
 // If the bitstream contains transparency, it is ignored.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size,
-                                    int* width, int* height);
+WEBP_EXTERN uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                                   int* width, int* height);
 
 // Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
-WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
-                                    int* width, int* height);
+WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                                   int* width, int* height);
 
 
 // Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
@@ -80,13 +80,13 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
 // have a common stride returned as '*uv_stride'.
 // Return NULL in case of error.
 // (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
-WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
-                                    int* width, int* height,
-                                    uint8_t** u, uint8_t** v,
-                                    int* stride, int* uv_stride);
+WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                                   int* width, int* height,
+                                   uint8_t** u, uint8_t** v,
+                                   int* stride, int* uv_stride);
 
 // Releases memory returned by the WebPDecode*() functions above.
-WEBP_EXTERN(void) WebPFree(void* ptr);
+WEBP_EXTERN void WebPFree(void* ptr);
 
 // These five functions are variants of the above ones, that decode the image
 // directly into a pre-allocated buffer 'output_buffer'. The maximum storage
@@ -96,22 +96,22 @@ WEBP_EXTERN(void) WebPFree(void* ptr);
 // The parameter 'output_stride' specifies the distance (in bytes)
 // between scanlines. Hence, output_buffer_size is expected to be at least
 // output_stride x picture-height.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto(
+WEBP_EXTERN uint8_t* WebPDecodeRGBAInto(
     const uint8_t* data, size_t data_size,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
-WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto(
+WEBP_EXTERN uint8_t* WebPDecodeARGBInto(
     const uint8_t* data, size_t data_size,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
-WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
+WEBP_EXTERN uint8_t* WebPDecodeBGRAInto(
     const uint8_t* data, size_t data_size,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
 
 // RGB and BGR variants. Here too the transparency information, if present,
 // will be dropped and ignored.
-WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
+WEBP_EXTERN uint8_t* WebPDecodeRGBInto(
     const uint8_t* data, size_t data_size,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
-WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
+WEBP_EXTERN uint8_t* WebPDecodeBGRInto(
     const uint8_t* data, size_t data_size,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
 
@@ -122,7 +122,7 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
 // 'u_size' and 'v_size' respectively.
 // Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
 // during decoding (or because some buffers were found to be too small).
-WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
+WEBP_EXTERN uint8_t* WebPDecodeYUVInto(
     const uint8_t* data, size_t data_size,
     uint8_t* luma, size_t luma_size, int luma_stride,
     uint8_t* u, size_t u_size, int u_stride,
@@ -213,7 +213,7 @@ struct WebPDecBuffer {
 };
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
+WEBP_EXTERN int WebPInitDecBufferInternal(WebPDecBuffer*, int);
 
 // Initialize the structure as empty. Must be called before any other use.
 // Returns false in case of version mismatch
@@ -223,7 +223,7 @@ static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
 
 // Free any memory associated with the buffer. Must always be called last.
 // Note: doesn't free the 'buffer' structure itself.
-WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
+WEBP_EXTERN void WebPFreeDecBuffer(WebPDecBuffer* buffer);
 
 //------------------------------------------------------------------------------
 // Enumeration of the status codes
@@ -277,7 +277,7 @@ typedef enum VP8StatusCode {
 // within valid bounds.
 // All other fields of WebPDecBuffer MUST remain constant between calls.
 // Returns NULL if the allocation failed.
-WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
+WEBP_EXTERN WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer);
 
 // This function allocates and initializes an incremental-decoder object, which
 // will output the RGB/A samples specified by 'csp' into a preallocated
@@ -289,7 +289,7 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
 // colorspace 'csp' is taken into account for allocating this buffer. All other
 // parameters are ignored.
 // Returns NULL if the allocation failed, or if some parameters are invalid.
-WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
+WEBP_EXTERN WebPIDecoder* WebPINewRGB(
     WEBP_CSP_MODE csp,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
 
@@ -304,7 +304,7 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
 // In this case, the output buffer will be automatically allocated (using
 // MODE_YUVA) when decoding starts. All parameters are then ignored.
 // Returns NULL if the allocation failed or if a parameter is invalid.
-WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
+WEBP_EXTERN WebPIDecoder* WebPINewYUVA(
     uint8_t* luma, size_t luma_size, int luma_stride,
     uint8_t* u, size_t u_size, int u_stride,
     uint8_t* v, size_t v_size, int v_stride,
@@ -312,19 +312,19 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
 
 // Deprecated version of the above, without the alpha plane.
 // Kept for backward compatibility.
-WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
+WEBP_EXTERN WebPIDecoder* WebPINewYUV(
     uint8_t* luma, size_t luma_size, int luma_stride,
     uint8_t* u, size_t u_size, int u_stride,
     uint8_t* v, size_t v_size, int v_stride);
 
 // Deletes the WebPIDecoder object and associated memory. Must always be called
 // if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
-WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
+WEBP_EXTERN void WebPIDelete(WebPIDecoder* idec);
 
 // Copies and decodes the next available data. Returns VP8_STATUS_OK when
 // the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
 // data is expected. Returns error in other cases.
-WEBP_EXTERN(VP8StatusCode) WebPIAppend(
+WEBP_EXTERN VP8StatusCode WebPIAppend(
     WebPIDecoder* idec, const uint8_t* data, size_t data_size);
 
 // A variant of the above function to be used when data buffer contains
@@ -332,7 +332,7 @@ WEBP_EXTERN(VP8StatusCode) WebPIAppend(
 // to the internal memory.
 // Note that the value of the 'data' pointer can change between calls to
 // WebPIUpdate, for instance when the data buffer is resized to fit larger data.
-WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
+WEBP_EXTERN VP8StatusCode WebPIUpdate(
     WebPIDecoder* idec, const uint8_t* data, size_t data_size);
 
 // Returns the RGB/A image decoded so far. Returns NULL if output params
@@ -340,15 +340,16 @@ WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
 // specified during call to WebPINewDecoder() or WebPINewRGB().
 // *last_y is the index of last decoded row in raster scan order. Some pointers
 // (*last_y, *width etc.) can be NULL if corresponding information is not
-// needed.
-WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
+// needed. The values in these pointers are only valid on successful (non-NULL)
+// return.
+WEBP_EXTERN uint8_t* WebPIDecGetRGB(
     const WebPIDecoder* idec, int* last_y,
     int* width, int* height, int* stride);
 
 // Same as above function to get a YUVA image. Returns pointer to the luma
 // plane or NULL in case of error. If there is no alpha information
 // the alpha pointer '*a' will be returned NULL.
-WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
+WEBP_EXTERN uint8_t* WebPIDecGetYUVA(
     const WebPIDecoder* idec, int* last_y,
     uint8_t** u, uint8_t** v, uint8_t** a,
     int* width, int* height, int* stride, int* uv_stride, int* a_stride);
@@ -368,7 +369,7 @@ static WEBP_INLINE uint8_t* WebPIDecGetYUV(
 // Returns NULL in case the incremental decoder object is in an invalid state.
 // Otherwise returns the pointer to the internal representation. This structure
 // is read-only, tied to WebPIDecoder's lifespan and should not be modified.
-WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
+WEBP_EXTERN const WebPDecBuffer* WebPIDecodedArea(
     const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
 
 //------------------------------------------------------------------------------
@@ -416,7 +417,7 @@ struct WebPBitstreamFeatures {
 };
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
+WEBP_EXTERN VP8StatusCode WebPGetFeaturesInternal(
     const uint8_t*, size_t, WebPBitstreamFeatures*, int);
 
 // Retrieve features from the bitstream. The *features structure is filled
@@ -457,7 +458,7 @@ struct WebPDecoderConfig {
 };
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
+WEBP_EXTERN int WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
 
 // Initialize the configuration as empty. This function must always be
 // called first, unless WebPGetFeatures() is to be called.
@@ -477,14 +478,14 @@ static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
 // The return WebPIDecoder object must always be deleted calling WebPIDelete().
 // Returns NULL in case of error (and config->status will then reflect
 // the error condition, if available).
-WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
-                                       WebPDecoderConfig* config);
+WEBP_EXTERN WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
+                                      WebPDecoderConfig* config);
 
 // Non-incremental version. This version decodes the full data at once, taking
 // 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
 // if the decoding was successful). Note that 'config' cannot be NULL.
-WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
-                                      WebPDecoderConfig* config);
+WEBP_EXTERN VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
+                                     WebPDecoderConfig* config);
 
 #ifdef __cplusplus
 }    // extern "C"
diff --git a/thirdparty/libwebp/webp/demux.h b/thirdparty/libwebp/src/webp/demux.h
index 454f6914b2..555d641338 100644
--- a/thirdparty/libwebp/webp/demux.h
+++ b/thirdparty/libwebp/src/webp/demux.h
@@ -71,7 +71,7 @@ typedef struct WebPAnimDecoderOptions WebPAnimDecoderOptions;
 
 // Returns the version number of the demux library, packed in hexadecimal using
 // 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
-WEBP_EXTERN(int) WebPGetDemuxVersion(void);
+WEBP_EXTERN int WebPGetDemuxVersion(void);
 
 //------------------------------------------------------------------------------
 // Life of a Demux object
@@ -85,7 +85,7 @@ typedef enum WebPDemuxState {
 } WebPDemuxState;
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
+WEBP_EXTERN WebPDemuxer* WebPDemuxInternal(
     const WebPData*, int, WebPDemuxState*, int);
 
 // Parses the full WebP file given by 'data'. For single images the WebP file
@@ -109,27 +109,32 @@ static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
 }
 
 // Frees memory associated with 'dmux'.
-WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
+WEBP_EXTERN void WebPDemuxDelete(WebPDemuxer* dmux);
 
 //------------------------------------------------------------------------------
 // Data/information extraction.
 
 typedef enum WebPFormatFeature {
-  WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
+  WEBP_FF_FORMAT_FLAGS,      // bit-wise combination of WebPFeatureFlags
+                             // corresponding to the 'VP8X' chunk (if present).
   WEBP_FF_CANVAS_WIDTH,
   WEBP_FF_CANVAS_HEIGHT,
-  WEBP_FF_LOOP_COUNT,
-  WEBP_FF_BACKGROUND_COLOR,
-  WEBP_FF_FRAME_COUNT    // Number of frames present in the demux object.
-                         // In case of a partial demux, this is the number of
-                         // frames seen so far, with the last frame possibly
-                         // being partial.
+  WEBP_FF_LOOP_COUNT,        // only relevant for animated file
+  WEBP_FF_BACKGROUND_COLOR,  // idem.
+  WEBP_FF_FRAME_COUNT        // Number of frames present in the demux object.
+                             // In case of a partial demux, this is the number
+                             // of frames seen so far, with the last frame
+                             // possibly being partial.
 } WebPFormatFeature;
 
 // Get the 'feature' value from the 'dmux'.
 // NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
 // returned a state > WEBP_DEMUX_PARSING_HEADER.
-WEBP_EXTERN(uint32_t) WebPDemuxGetI(
+// If 'feature' is WEBP_FF_FORMAT_FLAGS, the returned value is a bit-wise
+// combination of WebPFeatureFlags values.
+// If 'feature' is WEBP_FF_LOOP_COUNT, WEBP_FF_BACKGROUND_COLOR, the returned
+// value is only meaningful if the bitstream is animated.
+WEBP_EXTERN uint32_t WebPDemuxGetI(
     const WebPDemuxer* dmux, WebPFormatFeature feature);
 
 //------------------------------------------------------------------------------
@@ -159,20 +164,20 @@ struct WebPIterator {
 // Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
 // Call WebPDemuxReleaseIterator() when use of the iterator is complete.
 // NOTE: 'dmux' must persist for the lifetime of 'iter'.
-WEBP_EXTERN(int) WebPDemuxGetFrame(
+WEBP_EXTERN int WebPDemuxGetFrame(
     const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
 
 // Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
 // previous ('iter->frame_num' - 1) frame. These functions do not loop.
 // Returns true on success, false otherwise.
-WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
-WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
+WEBP_EXTERN int WebPDemuxNextFrame(WebPIterator* iter);
+WEBP_EXTERN int WebPDemuxPrevFrame(WebPIterator* iter);
 
 // Releases any memory associated with 'iter'.
 // Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
 // iter. Also, must be called before destroying the associated WebPDemuxer with
 // WebPDemuxDelete().
-WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+WEBP_EXTERN void WebPDemuxReleaseIterator(WebPIterator* iter);
 
 //------------------------------------------------------------------------------
 // Chunk iteration.
@@ -197,20 +202,20 @@ struct WebPChunkIterator {
 // payloads are accessed through WebPDemuxGetFrame() and related functions.
 // Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
 // NOTE: 'dmux' must persist for the lifetime of the iterator.
-WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
-                                   const char fourcc[4], int chunk_number,
-                                   WebPChunkIterator* iter);
+WEBP_EXTERN int WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                                  const char fourcc[4], int chunk_number,
+                                  WebPChunkIterator* iter);
 
 // Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
 // ('iter->chunk_num' - 1) chunk. These functions do not loop.
 // Returns true on success, false otherwise.
-WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
-WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
+WEBP_EXTERN int WebPDemuxNextChunk(WebPChunkIterator* iter);
+WEBP_EXTERN int WebPDemuxPrevChunk(WebPChunkIterator* iter);
 
 // Releases any memory associated with 'iter'.
 // Must be called before destroying the associated WebPDemuxer with
 // WebPDemuxDelete().
-WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+WEBP_EXTERN void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
 
 //------------------------------------------------------------------------------
 // WebPAnimDecoder API
@@ -252,7 +257,7 @@ struct WebPAnimDecoderOptions {
 };
 
 // Internal, version-checked, entry point.
-WEBP_EXTERN(int) WebPAnimDecoderOptionsInitInternal(
+WEBP_EXTERN int WebPAnimDecoderOptionsInitInternal(
     WebPAnimDecoderOptions*, int);
 
 // Should always be called, to initialize a fresh WebPAnimDecoderOptions
@@ -266,7 +271,7 @@ static WEBP_INLINE int WebPAnimDecoderOptionsInit(
 }
 
 // Internal, version-checked, entry point.
-WEBP_EXTERN(WebPAnimDecoder*) WebPAnimDecoderNewInternal(
+WEBP_EXTERN WebPAnimDecoder* WebPAnimDecoderNewInternal(
     const WebPData*, const WebPAnimDecoderOptions*, int);
 
 // Creates and initializes a WebPAnimDecoder object.
@@ -301,8 +306,8 @@ struct WebPAnimInfo {
 //   info - (out) global information fetched from the animation.
 // Returns:
 //   True on success.
-WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
-                                        WebPAnimInfo* info);
+WEBP_EXTERN int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
+                                       WebPAnimInfo* info);
 
 // Fetch the next frame from 'dec' based on options supplied to
 // WebPAnimDecoderNew(). This will be a fully reconstructed canvas of size
@@ -316,8 +321,8 @@ WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
 // Returns:
 //   False if any of the arguments are NULL, or if there is a parsing or
 //   decoding error, or if there are no more frames. Otherwise, returns true.
-WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
-                                        uint8_t** buf, int* timestamp);
+WEBP_EXTERN int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
+                                       uint8_t** buf, int* timestamp);
 
 // Check if there are more frames left to decode.
 // Parameters:
@@ -325,7 +330,7 @@ WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
 // Returns:
 //   True if 'dec' is not NULL and some frames are yet to be decoded.
 //   Otherwise, returns false.
-WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
+WEBP_EXTERN int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
 
 // Resets the WebPAnimDecoder object, so that next call to
 // WebPAnimDecoderGetNext() will restart decoding from 1st frame. This would be
@@ -333,7 +338,7 @@ WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
 // info.loop_count times) without destroying and recreating the 'dec' object.
 // Parameters:
 //   dec - (in/out) decoder instance to be reset
-WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
+WEBP_EXTERN void WebPAnimDecoderReset(WebPAnimDecoder* dec);
 
 // Grab the internal demuxer object.
 // Getting the demuxer object can be useful if one wants to use operations only
@@ -343,13 +348,13 @@ WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
 //
 // Parameters:
 //   dec - (in) decoder instance from which the demuxer object is to be fetched.
-WEBP_EXTERN(const WebPDemuxer*) WebPAnimDecoderGetDemuxer(
+WEBP_EXTERN const WebPDemuxer* WebPAnimDecoderGetDemuxer(
     const WebPAnimDecoder* dec);
 
 // Deletes the WebPAnimDecoder object.
 // Parameters:
 //   dec - (in/out) decoder instance to be deleted
-WEBP_EXTERN(void) WebPAnimDecoderDelete(WebPAnimDecoder* dec);
+WEBP_EXTERN void WebPAnimDecoderDelete(WebPAnimDecoder* dec);
 
 #ifdef __cplusplus
 }    // extern "C"
diff --git a/thirdparty/libwebp/webp/encode.h b/thirdparty/libwebp/src/webp/encode.h
index 35fde1d052..7ec3543dc2 100644
--- a/thirdparty/libwebp/webp/encode.h
+++ b/thirdparty/libwebp/src/webp/encode.h
@@ -35,7 +35,7 @@ typedef struct WebPMemoryWriter WebPMemoryWriter;
 
 // Return the encoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.
-WEBP_EXTERN(int) WebPGetEncoderVersion(void);
+WEBP_EXTERN int WebPGetEncoderVersion(void);
 
 //------------------------------------------------------------------------------
 // One-stop-shop call! No questions asked:
@@ -46,37 +46,37 @@ WEBP_EXTERN(int) WebPGetEncoderVersion(void);
 // These functions compress using the lossy format, and the quality_factor
 // can go from 0 (smaller output, lower quality) to 100 (best quality,
 // larger output).
-WEBP_EXTERN(size_t) WebPEncodeRGB(const uint8_t* rgb,
+WEBP_EXTERN size_t WebPEncodeRGB(const uint8_t* rgb,
+                                 int width, int height, int stride,
+                                 float quality_factor, uint8_t** output);
+WEBP_EXTERN size_t WebPEncodeBGR(const uint8_t* bgr,
+                                 int width, int height, int stride,
+                                 float quality_factor, uint8_t** output);
+WEBP_EXTERN size_t WebPEncodeRGBA(const uint8_t* rgba,
                                   int width, int height, int stride,
                                   float quality_factor, uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeBGR(const uint8_t* bgr,
+WEBP_EXTERN size_t WebPEncodeBGRA(const uint8_t* bgra,
                                   int width, int height, int stride,
                                   float quality_factor, uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeRGBA(const uint8_t* rgba,
-                                   int width, int height, int stride,
-                                   float quality_factor, uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeBGRA(const uint8_t* bgra,
-                                   int width, int height, int stride,
-                                   float quality_factor, uint8_t** output);
 
 // These functions are the equivalent of the above, but compressing in a
 // lossless manner. Files are usually larger than lossy format, but will
 // not suffer any compression loss.
-WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb,
+WEBP_EXTERN size_t WebPEncodeLosslessRGB(const uint8_t* rgb,
+                                         int width, int height, int stride,
+                                         uint8_t** output);
+WEBP_EXTERN size_t WebPEncodeLosslessBGR(const uint8_t* bgr,
+                                         int width, int height, int stride,
+                                         uint8_t** output);
+WEBP_EXTERN size_t WebPEncodeLosslessRGBA(const uint8_t* rgba,
                                           int width, int height, int stride,
                                           uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr,
+WEBP_EXTERN size_t WebPEncodeLosslessBGRA(const uint8_t* bgra,
                                           int width, int height, int stride,
                                           uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba,
-                                           int width, int height, int stride,
-                                           uint8_t** output);
-WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
-                                           int width, int height, int stride,
-                                           uint8_t** output);
 
 // Releases memory returned by the WebPEncode*() functions above.
-WEBP_EXTERN(void) WebPFree(void* ptr);
+WEBP_EXTERN void WebPFree(void* ptr);
 
 //------------------------------------------------------------------------------
 // Coding parameters
@@ -93,12 +93,15 @@ typedef enum WebPImageHint {
 // Compression parameters.
 struct WebPConfig {
   int lossless;           // Lossless encoding (0=lossy(default), 1=lossless).
-  float quality;          // between 0 (smallest file) and 100 (biggest)
+  float quality;          // between 0 and 100. For lossy, 0 gives the smallest
+                          // size and 100 the largest. For lossless, this
+                          // parameter is the amount of effort put into the
+                          // compression: 0 is the fastest but gives larger
+                          // files compared to the slowest, but best, 100.
   int method;             // quality/speed trade-off (0=fast, 6=slower-better)
 
   WebPImageHint image_hint;  // Hint for image type (lossless only for now).
 
-  // Parameters related to lossy compression only:
   int target_size;        // if non-zero, set the desired target size in bytes.
                           // Takes precedence over the 'compression' parameter.
   float target_PSNR;      // if non-zero, specifies the minimal distortion to
@@ -159,7 +162,7 @@ typedef enum WebPPreset {
 } WebPPreset;
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
+WEBP_EXTERN int WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
 
 // Should always be called, to initialize a fresh WebPConfig structure before
 // modification. Returns false in case of version mismatch. WebPConfigInit()
@@ -186,15 +189,15 @@ static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
 // speed and final compressed size.
 // This function will overwrite several fields from config: 'method', 'quality'
 // and 'lossless'. Returns false in case of parameter error.
-WEBP_EXTERN(int) WebPConfigLosslessPreset(WebPConfig* config, int level);
+WEBP_EXTERN int WebPConfigLosslessPreset(WebPConfig* config, int level);
 
 // Returns true if 'config' is non-NULL and all configuration parameters are
 // within their valid ranges.
-WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
+WEBP_EXTERN int WebPValidateConfig(const WebPConfig* config);
 
 //------------------------------------------------------------------------------
 // Input / Output
-// Structure for storing auxiliary statistics (mostly for lossy encoding).
+// Structure for storing auxiliary statistics.
 
 struct WebPAuxStats {
   int coded_size;         // final size
@@ -242,16 +245,16 @@ struct WebPMemoryWriter {
 };
 
 // The following must be called first before any use.
-WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
+WEBP_EXTERN void WebPMemoryWriterInit(WebPMemoryWriter* writer);
 
 // The following must be called to deallocate writer->mem memory. The 'writer'
 // object itself is not deallocated.
-WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer);
+WEBP_EXTERN void WebPMemoryWriterClear(WebPMemoryWriter* writer);
 // The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
 // completion, writer.mem and writer.size will hold the coded data.
 // writer.mem must be freed by calling WebPMemoryWriterClear.
-WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
-                                 const WebPPicture* picture);
+WEBP_EXTERN int WebPMemoryWrite(const uint8_t* data, size_t data_size,
+                                const WebPPicture* picture);
 
 // Progress hook, called from time to time to report progress. It can return
 // false to request an abort of the encoding process, or true otherwise if
@@ -354,7 +357,7 @@ struct WebPPicture {
 };
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
+WEBP_EXTERN int WebPPictureInitInternal(WebPPicture*, int);
 
 // Should always be called, to initialize the structure. Returns false in case
 // of version mismatch. WebPPictureInit() must have succeeded before using the
@@ -371,20 +374,20 @@ static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
 // Allocate y/u/v buffers as per colorspace/width/height specification.
 // Note! This function will free the previous buffer if needed.
 // Returns false in case of memory error.
-WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
+WEBP_EXTERN int WebPPictureAlloc(WebPPicture* picture);
 
 // Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
 // Note that this function does _not_ free the memory used by the 'picture'
 // object itself.
 // Besides memory (which is reclaimed) all other fields of 'picture' are
 // preserved.
-WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
+WEBP_EXTERN void WebPPictureFree(WebPPicture* picture);
 
 // Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
 // will fully own the copied pixels (this is not a view). The 'dst' picture need
 // not be initialized as its content is overwritten.
 // Returns false in case of memory allocation error.
-WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
+WEBP_EXTERN int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
 
 // Compute the single distortion for packed planes of samples.
 // 'src' will be compared to 'ref', and the raw distortion stored into
@@ -393,19 +396,19 @@ WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
 // 'x_step' is the horizontal stride (in bytes) between samples.
 // 'src/ref_stride' is the byte distance between rows.
 // Returns false in case of error (bad parameter, memory allocation error, ...).
-WEBP_EXTERN(int) WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
-                                     const uint8_t* ref, size_t ref_stride,
-                                     int width, int height,
-                                     size_t x_step,
-                                     int type,   // 0 = PSNR, 1 = SSIM, 2 = LSIM
-                                     float* distortion, float* result);
+WEBP_EXTERN int WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
+                                    const uint8_t* ref, size_t ref_stride,
+                                    int width, int height,
+                                    size_t x_step,
+                                    int type,   // 0 = PSNR, 1 = SSIM, 2 = LSIM
+                                    float* distortion, float* result);
 
 // Compute PSNR, SSIM or LSIM distortion metric between two pictures. Results
 // are in dB, stored in result[] in the B/G/R/A/All order. The distortion is
 // always performed using ARGB samples. Hence if the input is YUV(A), the
 // picture will be internally converted to ARGB (just for the measurement).
 // Warning: this function is rather CPU-intensive.
-WEBP_EXTERN(int) WebPPictureDistortion(
+WEBP_EXTERN int WebPPictureDistortion(
     const WebPPicture* src, const WebPPicture* ref,
     int metric_type,           // 0 = PSNR, 1 = SSIM, 2 = LSIM
     float result[5]);
@@ -418,8 +421,8 @@ WEBP_EXTERN(int) WebPPictureDistortion(
 // must be fully be comprised inside the 'src' source picture. If the source
 // picture uses the YUV420 colorspace, the top and left coordinates will be
 // snapped to even values.
-WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
-                                 int left, int top, int width, int height);
+WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture,
+                                int left, int top, int width, int height);
 
 // Extracts a view from 'src' picture into 'dst'. The rectangle for the view
 // is defined by the top-left corner pixel coordinates (left, top) as well
@@ -432,42 +435,42 @@ WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
 // with WebPPictureInit() if it is different from 'src', since its content will
 // be overwritten.
 // Returns false in case of memory allocation error or invalid parameters.
-WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
-                                 int left, int top, int width, int height,
-                                 WebPPicture* dst);
+WEBP_EXTERN int WebPPictureView(const WebPPicture* src,
+                                int left, int top, int width, int height,
+                                WebPPicture* dst);
 
 // Returns true if the 'picture' is actually a view and therefore does
 // not own the memory for pixels.
-WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
+WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture);
 
 // Rescale a picture to new dimension width x height.
 // If either 'width' or 'height' (but not both) is 0 the corresponding
 // dimension will be calculated preserving the aspect ratio.
 // No gamma correction is applied.
 // Returns false in case of error (invalid parameter or insufficient memory).
-WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
+WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height);
 
 // Colorspace conversion function to import RGB samples.
 // Previous buffer will be free'd, if any.
 // *rgb buffer should have a size of at least height * rgb_stride.
 // Returns false in case of memory error.
-WEBP_EXTERN(int) WebPPictureImportRGB(
+WEBP_EXTERN int WebPPictureImportRGB(
     WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
 // Same, but for RGBA buffer.
-WEBP_EXTERN(int) WebPPictureImportRGBA(
+WEBP_EXTERN int WebPPictureImportRGBA(
     WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
 // Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
 // input buffer ignoring the alpha channel. Avoids needing to copy the data
 // to a temporary 24-bit RGB buffer to import the RGB only.
-WEBP_EXTERN(int) WebPPictureImportRGBX(
+WEBP_EXTERN int WebPPictureImportRGBX(
     WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
 
 // Variants of the above, but taking BGR(A|X) input.
-WEBP_EXTERN(int) WebPPictureImportBGR(
+WEBP_EXTERN int WebPPictureImportBGR(
     WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
-WEBP_EXTERN(int) WebPPictureImportBGRA(
+WEBP_EXTERN int WebPPictureImportBGRA(
     WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
-WEBP_EXTERN(int) WebPPictureImportBGRX(
+WEBP_EXTERN int WebPPictureImportBGRX(
     WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
 
 // Converts picture->argb data to the YUV420A format. The 'colorspace'
@@ -476,14 +479,14 @@ WEBP_EXTERN(int) WebPPictureImportBGRX(
 // non-opaque transparent values is detected, and 'colorspace' will be
 // adjusted accordingly. Note that this method is lossy.
 // Returns false in case of error.
-WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
-                                       WebPEncCSP /*colorspace = WEBP_YUV420*/);
+WEBP_EXTERN int WebPPictureARGBToYUVA(WebPPicture* picture,
+                                      WebPEncCSP /*colorspace = WEBP_YUV420*/);
 
 // Same as WebPPictureARGBToYUVA(), but the conversion is done using
 // pseudo-random dithering with a strength 'dithering' between
 // 0.0 (no dithering) and 1.0 (maximum dithering). This is useful
 // for photographic picture.
-WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
+WEBP_EXTERN int WebPPictureARGBToYUVADithered(
     WebPPicture* picture, WebPEncCSP colorspace, float dithering);
 
 // Performs 'sharp' RGBA->YUVA420 downsampling and colorspace conversion.
@@ -491,9 +494,9 @@ WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
 // method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better
 // and sharper YUV representation.
 // Returns false in case of error.
-WEBP_EXTERN(int) WebPPictureSharpARGBToYUVA(WebPPicture* picture);
+WEBP_EXTERN int WebPPictureSharpARGBToYUVA(WebPPicture* picture);
 // kept for backward compatibility:
-WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
+WEBP_EXTERN int WebPPictureSmartARGBToYUVA(WebPPicture* picture);
 
 // Converts picture->yuv to picture->argb and sets picture->use_argb to true.
 // The input format must be YUV_420 or YUV_420A. The conversion from YUV420 to
@@ -501,22 +504,22 @@ WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
 // Note that the use of this colorspace is discouraged if one has access to the
 // raw ARGB samples, since using YUV420 is comparatively lossy.
 // Returns false in case of error.
-WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
+WEBP_EXTERN int WebPPictureYUVAToARGB(WebPPicture* picture);
 
 // Helper function: given a width x height plane of RGBA or YUV(A) samples
-// clean-up the YUV or RGB samples under fully transparent area, to help
-// compressibility (no guarantee, though).
-WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
+// clean-up or smoothen the YUV or RGB samples under fully transparent area,
+// to help compressibility (no guarantee, though).
+WEBP_EXTERN void WebPCleanupTransparentArea(WebPPicture* picture);
 
 // Scan the picture 'picture' for the presence of non fully opaque alpha values.
 // Returns true in such case. Otherwise returns false (indicating that the
 // alpha plane can be ignored altogether e.g.).
-WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
+WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture);
 
 // Remove the transparency information (if present) by blending the color with
 // the background color 'background_rgb' (specified as 24bit RGB triplet).
 // After this call, all alpha values are reset to 0xff.
-WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
+WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
 
 //------------------------------------------------------------------------------
 // Main call
@@ -531,7 +534,7 @@ WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
 // the former for lossy encoding, and the latter for lossless encoding
 // (when config.lossless is true). Automatic conversion from one format to
 // another is provided but they both incur some loss.
-WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
+WEBP_EXTERN int WebPEncode(const WebPConfig* config, WebPPicture* picture);
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/webp/format_constants.h b/thirdparty/libwebp/src/webp/format_constants.h
index 329fc8a3b0..329fc8a3b0 100644
--- a/thirdparty/libwebp/webp/format_constants.h
+++ b/thirdparty/libwebp/src/webp/format_constants.h
diff --git a/thirdparty/libwebp/webp/mux.h b/thirdparty/libwebp/src/webp/mux.h
index daccc65e86..28bb4a41c9 100644
--- a/thirdparty/libwebp/webp/mux.h
+++ b/thirdparty/libwebp/src/webp/mux.h
@@ -98,13 +98,13 @@ typedef enum WebPChunkId {
 
 // Returns the version number of the mux library, packed in hexadecimal using
 // 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
-WEBP_EXTERN(int) WebPGetMuxVersion(void);
+WEBP_EXTERN int WebPGetMuxVersion(void);
 
 //------------------------------------------------------------------------------
 // Life of a Mux object
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
+WEBP_EXTERN WebPMux* WebPNewInternal(int);
 
 // Creates an empty mux object.
 // Returns:
@@ -117,13 +117,13 @@ static WEBP_INLINE WebPMux* WebPMuxNew(void) {
 // Deletes the mux object.
 // Parameters:
 //   mux - (in/out) object to be deleted
-WEBP_EXTERN(void) WebPMuxDelete(WebPMux* mux);
+WEBP_EXTERN void WebPMuxDelete(WebPMux* mux);
 
 //------------------------------------------------------------------------------
 // Mux creation.
 
 // Internal, version-checked, entry point
-WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
+WEBP_EXTERN WebPMux* WebPMuxCreateInternal(const WebPData*, int, int);
 
 // Creates a mux object from raw data given in WebP RIFF format.
 // Parameters:
@@ -160,7 +160,7 @@ static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
 //                               or if fourcc corresponds to an image chunk.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
+WEBP_EXTERN WebPMuxError WebPMuxSetChunk(
     WebPMux* mux, const char fourcc[4], const WebPData* chunk_data,
     int copy_data);
 
@@ -176,7 +176,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
 //                               or if fourcc corresponds to an image chunk.
 //   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
+WEBP_EXTERN WebPMuxError WebPMuxGetChunk(
     const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
 
 // Deletes the chunk with the given 'fourcc' from the mux object.
@@ -189,7 +189,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
 //                               or if fourcc corresponds to an image chunk.
 //   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(
+WEBP_EXTERN WebPMuxError WebPMuxDeleteChunk(
     WebPMux* mux, const char fourcc[4]);
 
 //------------------------------------------------------------------------------
@@ -222,7 +222,7 @@ struct WebPMuxFrameInfo {
 //   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
+WEBP_EXTERN WebPMuxError WebPMuxSetImage(
     WebPMux* mux, const WebPData* bitstream, int copy_data);
 
 // Adds a frame at the end of the mux object.
@@ -241,7 +241,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
 //                               or if content of 'frame' is invalid.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
+WEBP_EXTERN WebPMuxError WebPMuxPushFrame(
     WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
 
 // Gets the nth frame from the mux object.
@@ -259,7 +259,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
 //   WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
+WEBP_EXTERN WebPMuxError WebPMuxGetFrame(
     const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
 
 // Deletes a frame from the mux object.
@@ -272,7 +272,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
 //   WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
 //                        before deletion.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
+WEBP_EXTERN WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
 
 //------------------------------------------------------------------------------
 // Animation.
@@ -296,7 +296,7 @@ struct WebPMuxAnimParams {
 //   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
+WEBP_EXTERN WebPMuxError WebPMuxSetAnimationParams(
     WebPMux* mux, const WebPMuxAnimParams* params);
 
 // Gets the animation parameters from the mux object.
@@ -307,7 +307,7 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
 //   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
 //   WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
+WEBP_EXTERN WebPMuxError WebPMuxGetAnimationParams(
     const WebPMux* mux, WebPMuxAnimParams* params);
 
 //------------------------------------------------------------------------------
@@ -328,8 +328,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
 //   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL; or
 //                               width or height are invalid or out of bounds
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
-                                               int width, int height);
+WEBP_EXTERN WebPMuxError WebPMuxSetCanvasSize(WebPMux* mux,
+                                              int width, int height);
 
 // Gets the canvas size from the mux object.
 // Note: This method assumes that the VP8X chunk, if present, is up-to-date.
@@ -343,8 +343,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
 //   WEBP_MUX_INVALID_ARGUMENT - if mux, width or height is NULL.
 //   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
-                                               int* width, int* height);
+WEBP_EXTERN WebPMuxError WebPMuxGetCanvasSize(const WebPMux* mux,
+                                              int* width, int* height);
 
 // Gets the feature flags from the mux object.
 // Note: This method assumes that the VP8X chunk, if present, is up-to-date.
@@ -359,8 +359,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
 //   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL.
 //   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
-                                             uint32_t* flags);
+WEBP_EXTERN WebPMuxError WebPMuxGetFeatures(const WebPMux* mux,
+                                            uint32_t* flags);
 
 // Gets number of chunks with the given 'id' in the mux object.
 // Parameters:
@@ -370,8 +370,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
 // Returns:
 //   WEBP_MUX_INVALID_ARGUMENT - if mux, or num_elements is NULL.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
-                                           WebPChunkId id, int* num_elements);
+WEBP_EXTERN WebPMuxError WebPMuxNumChunks(const WebPMux* mux,
+                                          WebPChunkId id, int* num_elements);
 
 // Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
 // This function also validates the mux object.
@@ -388,8 +388,8 @@ WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
 //   WEBP_MUX_INVALID_ARGUMENT - if mux or assembled_data is NULL.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
-                                          WebPData* assembled_data);
+WEBP_EXTERN WebPMuxError WebPMuxAssemble(WebPMux* mux,
+                                         WebPData* assembled_data);
 
 //------------------------------------------------------------------------------
 // WebPAnimEncoder API
@@ -442,7 +442,7 @@ struct WebPAnimEncoderOptions {
 };
 
 // Internal, version-checked, entry point.
-WEBP_EXTERN(int) WebPAnimEncoderOptionsInitInternal(
+WEBP_EXTERN int WebPAnimEncoderOptionsInitInternal(
     WebPAnimEncoderOptions*, int);
 
 // Should always be called, to initialize a fresh WebPAnimEncoderOptions
@@ -455,7 +455,7 @@ static WEBP_INLINE int WebPAnimEncoderOptionsInit(
 }
 
 // Internal, version-checked, entry point.
-WEBP_EXTERN(WebPAnimEncoder*) WebPAnimEncoderNewInternal(
+WEBP_EXTERN WebPAnimEncoder* WebPAnimEncoderNewInternal(
     int, int, const WebPAnimEncoderOptions*, int);
 
 // Creates and initializes a WebPAnimEncoder object.
@@ -490,7 +490,7 @@ static WEBP_INLINE WebPAnimEncoder* WebPAnimEncoderNew(
 // Returns:
 //   On error, returns false and frame->error_code is set appropriately.
 //   Otherwise, returns true.
-WEBP_EXTERN(int) WebPAnimEncoderAdd(
+WEBP_EXTERN int WebPAnimEncoderAdd(
     WebPAnimEncoder* enc, struct WebPPicture* frame, int timestamp_ms,
     const struct WebPConfig* config);
 
@@ -503,8 +503,8 @@ WEBP_EXTERN(int) WebPAnimEncoderAdd(
 //   webp_data - (out) generated WebP bitstream.
 // Returns:
 //   True on success.
-WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
-                                         WebPData* webp_data);
+WEBP_EXTERN int WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
+                                        WebPData* webp_data);
 
 // Get error string corresponding to the most recent call using 'enc'. The
 // returned string is owned by 'enc' and is valid only until the next call to
@@ -514,12 +514,12 @@ WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
 // Returns:
 //   NULL if 'enc' is NULL. Otherwise, returns the error string if the last call
 //   to 'enc' had an error, or an empty string if the last call was a success.
-WEBP_EXTERN(const char*) WebPAnimEncoderGetError(WebPAnimEncoder* enc);
+WEBP_EXTERN const char* WebPAnimEncoderGetError(WebPAnimEncoder* enc);
 
 // Deletes the WebPAnimEncoder object.
 // Parameters:
 //   enc - (in/out) object to be deleted
-WEBP_EXTERN(void) WebPAnimEncoderDelete(WebPAnimEncoder* enc);
+WEBP_EXTERN void WebPAnimEncoderDelete(WebPAnimEncoder* enc);
 
 //------------------------------------------------------------------------------
 
diff --git a/thirdparty/libwebp/webp/mux_types.h b/thirdparty/libwebp/src/webp/mux_types.h
index b37e2c67aa..b37e2c67aa 100644
--- a/thirdparty/libwebp/webp/mux_types.h
+++ b/thirdparty/libwebp/src/webp/mux_types.h
diff --git a/thirdparty/libwebp/webp/types.h b/thirdparty/libwebp/src/webp/types.h
index 98fff35a11..989a763f0d 100644
--- a/thirdparty/libwebp/webp/types.h
+++ b/thirdparty/libwebp/src/webp/types.h
@@ -40,9 +40,9 @@ typedef long long int int64_t;
 // This explicitly marks library functions and allows for changing the
 // signature for e.g., Windows DLL builds.
 # if defined(__GNUC__) && __GNUC__ >= 4
-#  define WEBP_EXTERN(type) extern __attribute__ ((visibility ("default"))) type
+#  define WEBP_EXTERN extern __attribute__ ((visibility ("default")))
 # else
-#  define WEBP_EXTERN(type) extern type
+#  define WEBP_EXTERN extern
 # endif  /* __GNUC__ >= 4 */
 #endif  /* WEBP_EXTERN */